forked from D-Net/dnet-hadoop
resolving conflicts
This commit is contained in:
commit
0628df7a3a
|
@ -211,7 +211,7 @@ public class GraphCleaningFunctions extends CleaningFunctions {
|
|||
.orElse(s.getValue()),
|
||||
Function.identity(),
|
||||
(s1, s2) -> Collections
|
||||
.min(Lists.newArrayList(s1, s1), new SubjectProvenanceComparator())))
|
||||
.min(Lists.newArrayList(s1, s2), new SubjectProvenanceComparator())))
|
||||
.values());
|
||||
r.setSubject(subjects);
|
||||
}
|
||||
|
|
|
@ -49,7 +49,7 @@ object DataciteToOAFTransformation {
|
|||
/** This method should skip record if json contains invalid text
|
||||
* defined in file datacite_filter
|
||||
*
|
||||
* @param record : unparsed datacite record
|
||||
* @param record : not parsed Datacite record
|
||||
* @param json : parsed record
|
||||
* @return True if the record should be skipped
|
||||
*/
|
||||
|
@ -98,6 +98,10 @@ object DataciteToOAFTransformation {
|
|||
|
||||
}
|
||||
|
||||
/** This utility method indicates whether the embargo date has been reached
|
||||
* @param embargo_end_date
|
||||
* @return True if the embargo date has been reached, false otherwise
|
||||
*/
|
||||
def embargo_end(embargo_end_date: String): Boolean = {
|
||||
val dt = LocalDate.parse(embargo_end_date, DateTimeFormatter.ofPattern("[yyyy-MM-dd]"))
|
||||
val td = LocalDate.now()
|
||||
|
@ -142,6 +146,21 @@ object DataciteToOAFTransformation {
|
|||
}
|
||||
}
|
||||
|
||||
/** *
|
||||
* Use the vocabulary dnet:publication_resource to find a synonym to one of these terms and get the instance.type.
|
||||
* Using the dnet:result_typologies vocabulary, we look up the instance.type synonym
|
||||
* to generate one of the following main entities:
|
||||
* - publication
|
||||
* - dataset
|
||||
* - software
|
||||
* - otherresearchproduct
|
||||
*
|
||||
* @param resourceType
|
||||
* @param resourceTypeGeneral
|
||||
* @param schemaOrg
|
||||
* @param vocabularies
|
||||
* @return
|
||||
*/
|
||||
def getTypeQualifier(
|
||||
resourceType: String,
|
||||
resourceTypeGeneral: String,
|
||||
|
@ -330,6 +349,7 @@ object DataciteToOAFTransformation {
|
|||
if (result == null)
|
||||
return List()
|
||||
|
||||
// DOI is mapped on a PID inside a Instance object
|
||||
val doi_q = OafMapperUtils.qualifier(
|
||||
"doi",
|
||||
"doi",
|
||||
|
@ -338,6 +358,8 @@ object DataciteToOAFTransformation {
|
|||
)
|
||||
val pid = OafMapperUtils.structuredProperty(doi, doi_q, dataInfo)
|
||||
result.setPid(List(pid).asJava)
|
||||
|
||||
// This identifiere will be replaced in a second moment using the PID logic generation
|
||||
result.setId(OafMapperUtils.createOpenaireId(50, s"datacite____::$doi", true))
|
||||
result.setOriginalId(List(doi).asJava)
|
||||
|
||||
|
@ -386,6 +408,10 @@ object DataciteToOAFTransformation {
|
|||
a
|
||||
}
|
||||
|
||||
if (authors == null || authors.isEmpty || !authors.exists(a => a != null))
|
||||
return List()
|
||||
result.setAuthor(authors.asJava)
|
||||
|
||||
val titles: List[TitleType] = (json \\ "titles").extractOrElse[List[TitleType]](List())
|
||||
|
||||
result.setTitle(
|
||||
|
@ -409,10 +435,6 @@ object DataciteToOAFTransformation {
|
|||
.asJava
|
||||
)
|
||||
|
||||
if (authors == null || authors.isEmpty || !authors.exists(a => a != null))
|
||||
return List()
|
||||
result.setAuthor(authors.asJava)
|
||||
|
||||
val dates = (json \\ "dates").extract[List[DateType]]
|
||||
val publication_year = (json \\ "publicationYear").extractOrElse[String](null)
|
||||
|
||||
|
|
|
@ -554,7 +554,7 @@ public class PublicationToOaf implements Serializable {
|
|||
private KeyValue createCollectedFrom() {
|
||||
KeyValue cf = new KeyValue();
|
||||
cf.setValue(ModelConstants.ORCID.toUpperCase());
|
||||
cf.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + "806360c771262b4d6770e7cdf04b5c5a");
|
||||
cf.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + "cd0f74b5955dc87fd0605745c4b49ee8");
|
||||
return cf;
|
||||
}
|
||||
|
||||
|
|
|
@ -134,11 +134,17 @@ public class ResultTagger implements Serializable {
|
|||
/* Tagging for Advanced Constraints */
|
||||
final Set<String> aconstraints = new HashSet<>();
|
||||
|
||||
conf.getSelectionConstraintsMap().keySet()
|
||||
conf
|
||||
.getSelectionConstraintsMap()
|
||||
.keySet()
|
||||
.forEach(communityId -> {
|
||||
if(conf.getSelectionConstraintsMap().get(communityId) != null &&
|
||||
conf.getSelectionConstraintsMap().get(communityId)
|
||||
.getCriteria().stream().anyMatch(crit -> crit.verifyCriteria(param)))
|
||||
if (conf.getSelectionConstraintsMap().get(communityId) != null &&
|
||||
conf
|
||||
.getSelectionConstraintsMap()
|
||||
.get(communityId)
|
||||
.getCriteria()
|
||||
.stream()
|
||||
.anyMatch(crit -> crit.verifyCriteria(param)))
|
||||
aconstraints.add(communityId);
|
||||
});
|
||||
|
||||
|
@ -152,7 +158,7 @@ public class ResultTagger implements Serializable {
|
|||
}
|
||||
|
||||
result.getContext().forEach(c -> {
|
||||
String cId = c.getId();
|
||||
final String cId = c.getId();
|
||||
if (communities.contains(cId)) {
|
||||
Optional<List<DataInfo>> opt_dataInfoList = Optional.ofNullable(c.getDataInfo());
|
||||
List<DataInfo> dataInfoList;
|
||||
|
@ -164,21 +170,48 @@ public class ResultTagger implements Serializable {
|
|||
}
|
||||
if (subjects.contains(cId))
|
||||
dataInfoList
|
||||
.add(OafMapperUtils.dataInfo(false, BULKTAG_DATA_INFO_TYPE, true, false,
|
||||
OafMapperUtils.qualifier(CLASS_ID_SUBJECT, CLASS_NAME_BULKTAG_SUBJECT, DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS), TAGGING_TRUST));
|
||||
.add(
|
||||
OafMapperUtils
|
||||
.dataInfo(
|
||||
false, BULKTAG_DATA_INFO_TYPE, true, false,
|
||||
OafMapperUtils
|
||||
.qualifier(
|
||||
CLASS_ID_SUBJECT, CLASS_NAME_BULKTAG_SUBJECT, DNET_PROVENANCE_ACTIONS,
|
||||
DNET_PROVENANCE_ACTIONS),
|
||||
TAGGING_TRUST));
|
||||
if (datasources.contains(cId))
|
||||
dataInfoList
|
||||
.add(OafMapperUtils.dataInfo(false, BULKTAG_DATA_INFO_TYPE, true, false,
|
||||
OafMapperUtils.qualifier(CLASS_ID_DATASOURCE, CLASS_NAME_BULKTAG_DATASOURCE, DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS), TAGGING_TRUST));
|
||||
.add(
|
||||
OafMapperUtils
|
||||
.dataInfo(
|
||||
false, BULKTAG_DATA_INFO_TYPE, true, false,
|
||||
OafMapperUtils
|
||||
.qualifier(
|
||||
CLASS_ID_DATASOURCE, CLASS_NAME_BULKTAG_DATASOURCE, DNET_PROVENANCE_ACTIONS,
|
||||
DNET_PROVENANCE_ACTIONS),
|
||||
TAGGING_TRUST));
|
||||
if (czenodo.contains(cId))
|
||||
dataInfoList
|
||||
.add(OafMapperUtils.dataInfo(false, BULKTAG_DATA_INFO_TYPE, true, false,
|
||||
OafMapperUtils.qualifier(CLASS_ID_CZENODO, CLASS_NAME_BULKTAG_ZENODO, DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS), TAGGING_TRUST));
|
||||
.add(
|
||||
OafMapperUtils
|
||||
.dataInfo(
|
||||
false, BULKTAG_DATA_INFO_TYPE, true, false,
|
||||
OafMapperUtils
|
||||
.qualifier(
|
||||
CLASS_ID_CZENODO, CLASS_NAME_BULKTAG_ZENODO, DNET_PROVENANCE_ACTIONS,
|
||||
DNET_PROVENANCE_ACTIONS),
|
||||
TAGGING_TRUST));
|
||||
if (aconstraints.contains(cId))
|
||||
dataInfoList
|
||||
.add(
|
||||
OafMapperUtils.dataInfo(false, BULKTAG_DATA_INFO_TYPE, true, false,
|
||||
OafMapperUtils.qualifier(CLASS_ID_ADVANCED_CONSTRAINT, CLASS_NAME_BULKTAG_ADVANCED_CONSTRAINT, DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS), TAGGING_TRUST));
|
||||
OafMapperUtils
|
||||
.dataInfo(
|
||||
false, BULKTAG_DATA_INFO_TYPE, true, false,
|
||||
OafMapperUtils
|
||||
.qualifier(
|
||||
CLASS_ID_ADVANCED_CONSTRAINT, CLASS_NAME_BULKTAG_ADVANCED_CONSTRAINT,
|
||||
DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS),
|
||||
TAGGING_TRUST));
|
||||
|
||||
}
|
||||
});
|
||||
|
@ -199,21 +232,48 @@ public class ResultTagger implements Serializable {
|
|||
List<DataInfo> dataInfoList = new ArrayList<>();
|
||||
if (subjects.contains(c))
|
||||
dataInfoList
|
||||
.add(OafMapperUtils.dataInfo(false, BULKTAG_DATA_INFO_TYPE, true, false,
|
||||
OafMapperUtils.qualifier(CLASS_ID_SUBJECT, CLASS_NAME_BULKTAG_SUBJECT, DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS), TAGGING_TRUST));
|
||||
.add(
|
||||
OafMapperUtils
|
||||
.dataInfo(
|
||||
false, BULKTAG_DATA_INFO_TYPE, true, false,
|
||||
OafMapperUtils
|
||||
.qualifier(
|
||||
CLASS_ID_SUBJECT, CLASS_NAME_BULKTAG_SUBJECT, DNET_PROVENANCE_ACTIONS,
|
||||
DNET_PROVENANCE_ACTIONS),
|
||||
TAGGING_TRUST));
|
||||
if (datasources.contains(c))
|
||||
dataInfoList
|
||||
.add(OafMapperUtils.dataInfo(false, BULKTAG_DATA_INFO_TYPE, true, false,
|
||||
OafMapperUtils.qualifier(CLASS_ID_DATASOURCE, CLASS_NAME_BULKTAG_DATASOURCE, DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS), TAGGING_TRUST));
|
||||
.add(
|
||||
OafMapperUtils
|
||||
.dataInfo(
|
||||
false, BULKTAG_DATA_INFO_TYPE, true, false,
|
||||
OafMapperUtils
|
||||
.qualifier(
|
||||
CLASS_ID_DATASOURCE, CLASS_NAME_BULKTAG_DATASOURCE,
|
||||
DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS),
|
||||
TAGGING_TRUST));
|
||||
if (czenodo.contains(c))
|
||||
dataInfoList
|
||||
.add(OafMapperUtils.dataInfo(false, BULKTAG_DATA_INFO_TYPE, true, false,
|
||||
OafMapperUtils.qualifier(CLASS_ID_CZENODO, CLASS_NAME_BULKTAG_ZENODO, DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS), TAGGING_TRUST));
|
||||
.add(
|
||||
OafMapperUtils
|
||||
.dataInfo(
|
||||
false, BULKTAG_DATA_INFO_TYPE, true, false,
|
||||
OafMapperUtils
|
||||
.qualifier(
|
||||
CLASS_ID_CZENODO, CLASS_NAME_BULKTAG_ZENODO, DNET_PROVENANCE_ACTIONS,
|
||||
DNET_PROVENANCE_ACTIONS),
|
||||
TAGGING_TRUST));
|
||||
if (aconstraints.contains(c))
|
||||
dataInfoList
|
||||
.add(
|
||||
OafMapperUtils.dataInfo(false, BULKTAG_DATA_INFO_TYPE, true, false,
|
||||
OafMapperUtils.qualifier(CLASS_ID_ADVANCED_CONSTRAINT, CLASS_NAME_BULKTAG_ADVANCED_CONSTRAINT, DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS), TAGGING_TRUST));
|
||||
OafMapperUtils
|
||||
.dataInfo(
|
||||
false, BULKTAG_DATA_INFO_TYPE, true, false,
|
||||
OafMapperUtils
|
||||
.qualifier(
|
||||
CLASS_ID_ADVANCED_CONSTRAINT, CLASS_NAME_BULKTAG_ADVANCED_CONSTRAINT,
|
||||
DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS),
|
||||
TAGGING_TRUST));
|
||||
|
||||
context.setDataInfo(dataInfoList);
|
||||
return context;
|
||||
|
|
|
@ -3,7 +3,7 @@ package eu.dnetlib.dhp.bulktag.criteria;
|
|||
|
||||
import java.io.Serializable;
|
||||
|
||||
@VerbClass("contains_ignorecase")
|
||||
@VerbClass("contains_caseinsensitive")
|
||||
public class ContainsVerbIgnoreCase implements Selection, Serializable {
|
||||
|
||||
private String param;
|
||||
|
|
|
@ -3,7 +3,7 @@ package eu.dnetlib.dhp.bulktag.criteria;
|
|||
|
||||
import java.io.Serializable;
|
||||
|
||||
@VerbClass("equals_ignorecase")
|
||||
@VerbClass("equals_caseinsensitive")
|
||||
public class EqualVerbIgnoreCase implements Selection, Serializable {
|
||||
|
||||
private String param;
|
||||
|
|
|
@ -3,7 +3,7 @@ package eu.dnetlib.dhp.bulktag.criteria;
|
|||
|
||||
import java.io.Serializable;
|
||||
|
||||
@VerbClass("not_contains_ignorecase")
|
||||
@VerbClass("not_contains_caseinsensitive")
|
||||
public class NotContainsVerbIgnoreCase implements Selection, Serializable {
|
||||
|
||||
private String param;
|
||||
|
|
|
@ -3,7 +3,7 @@ package eu.dnetlib.dhp.bulktag.criteria;
|
|||
|
||||
import java.io.Serializable;
|
||||
|
||||
@VerbClass("not_equals_ignorecase")
|
||||
@VerbClass("not_equals_caseinsensitive")
|
||||
public class NotEqualVerbIgnoreCase implements Selection, Serializable {
|
||||
|
||||
private String param;
|
||||
|
|
|
@ -1193,7 +1193,7 @@
|
|||
<organizations/>
|
||||
</community>
|
||||
<community id="science-innovation-policy">
|
||||
<advancedConstraints>{"criteria":[{"constraint":[{"verb":"equals_ignorecase","field":"subject","value":"ciencias de la comunicación"},
|
||||
<advancedConstraints>{"criteria":[{"constraint":[{"verb":"equals_caseinsensitive","field":"subject","value":"ciencias de la comunicación"},
|
||||
{"verb":"equals","field":"subject","value":"Miriam"}]},
|
||||
{"constraint":[{"verb":"equals","field":"subject","value":"miriam"}]}]}</advancedConstraints>
|
||||
<subjects>
|
||||
|
@ -1317,81 +1317,81 @@
|
|||
<datasources>
|
||||
<datasource>
|
||||
<openaireId>opendoar____::358aee4cc897452c00244351e4d91f69</openaireId>
|
||||
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"COVID-19"}]},
|
||||
{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"SARS-CoV-2"}]},
|
||||
{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"2019-nCoV"}}]}
|
||||
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},
|
||||
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},
|
||||
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}}]}
|
||||
</selcriteria>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>re3data_____::7b0ad08687b2c960d5aeef06f811d5e6</openaireId>
|
||||
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"COVID-19"}]},
|
||||
{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"SARS-CoV-2"}]},
|
||||
{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"2019-nCoV"}]}]}
|
||||
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},
|
||||
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},
|
||||
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]}
|
||||
</selcriteria>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>driver______::bee53aa31dc2cbb538c10c2b65fa5824</openaireId>
|
||||
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"COVID-19"}]},
|
||||
{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"SARS-CoV-2"}]},
|
||||
{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"2019-nCoV"}]}]}
|
||||
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},
|
||||
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},
|
||||
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]}
|
||||
</selcriteria>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>openaire____::437f4b072b1aa198adcbc35910ff3b98</openaireId>
|
||||
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"COVID-19"}]},
|
||||
{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"SARS-CoV-2"}]},
|
||||
{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"2019-nCoV"}]}]}
|
||||
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},
|
||||
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},
|
||||
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]}
|
||||
</selcriteria>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>openaire____::081b82f96300b6a6e3d282bad31cb6e2</openaireId>
|
||||
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"COVID-19"}]},
|
||||
{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"SARS-CoV-2"}]},
|
||||
{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"2019-nCoV"}]}]}
|
||||
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},
|
||||
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},
|
||||
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]}
|
||||
</selcriteria>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>openaire____::9e3be59865b2c1c335d32dae2fe7b254</openaireId>
|
||||
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"COVID-19"}]},
|
||||
{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"SARS-CoV-2"}]},
|
||||
{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"2019-nCoV"}]}]}
|
||||
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},
|
||||
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},
|
||||
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]}
|
||||
</selcriteria>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>opendoar____::8b6dd7db9af49e67306feb59a8bdc52c</openaireId>
|
||||
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"COVID-19"}]},
|
||||
{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"SARS-CoV-2"}]},
|
||||
{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"2019-nCoV"}]}]}
|
||||
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},
|
||||
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},
|
||||
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]}
|
||||
</selcriteria>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>share_______::4719356ec8d7d55d3feb384ce879ad6c</openaireId>
|
||||
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"COVID-19"}]},
|
||||
{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"SARS-CoV-2"}]},
|
||||
{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"2019-nCoV"}]}]}
|
||||
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},
|
||||
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},
|
||||
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]}
|
||||
</selcriteria>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>share_______::bbd802baad85d1fd440f32a7a3a2c2b1</openaireId>
|
||||
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"COVID-19"}]},
|
||||
{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"SARS-CoV-2"}]},
|
||||
{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"2019-nCoV"}]}]}
|
||||
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},
|
||||
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},
|
||||
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]}
|
||||
</selcriteria>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>opendoar____::6f4922f45568161a8cdf4ad2299f6d23</openaireId>
|
||||
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"COVID-19"}]},
|
||||
{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"SARS-CoV-2"}]},
|
||||
{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"2019-nCoV"}]}]}
|
||||
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},
|
||||
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},
|
||||
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]}
|
||||
</selcriteria>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>re3data_____::7980778c78fb4cf0fab13ce2159030dc</openaireId>
|
||||
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"SARS-CoV-2"}]},{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"COVID-19"}]},{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"2019-nCov"}]}]}</selcriteria>
|
||||
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCov"}]}]}</selcriteria>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>re3data_____::978378def740bbf2bfb420de868c460b</openaireId>
|
||||
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"SARS-CoV-2"}]},{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"COVID-19"}]},{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"2019-nCov"}]}]}</selcriteria>
|
||||
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCov"}]}]}</selcriteria>
|
||||
</datasource>
|
||||
</datasources>
|
||||
<zenodocommunities>
|
||||
|
|
|
@ -0,0 +1,25 @@
|
|||
# Root logger option
|
||||
log4j.rootLogger=DEBUG, stdout
|
||||
|
||||
# Direct log messages to stdout
|
||||
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
|
||||
log4j.appender.stdout.Target=System.out
|
||||
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
|
||||
log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n
|
||||
|
||||
# Change this to set Spark log level
|
||||
log4j.logger.org.apache.spark=ERROR
|
||||
log4j.rootCategory=WARN
|
||||
|
||||
# Silence akka remoting
|
||||
log4j.logger.Remoting=WARN
|
||||
|
||||
# Ignore messages below warning level from Jetty, because it's a bit verbose
|
||||
log4j.logger.org.eclipse.jetty=WARN
|
||||
|
||||
log4j.logger.org.apache.hadoop.mapreduce.lib.output.FileOutputCommitterFactory=WARN
|
||||
log4j.logger.org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter=WARN
|
||||
log4j.logger.org.apache.parquet.hadoop.ParquetOutputFormat=WARN
|
||||
log4j.logger.org.apache.parquet.hadoop.InternalParquetRecordWriter=WARN
|
||||
log4j.logger.org.apache.hadoop.io.compress.CodecPool=WARN
|
||||
log4j.logger.org.apache.parquet.hadoop.codec.CodecConfig=WARN
|
|
@ -3,6 +3,7 @@ package eu.dnetlib.dhp.oa.graph.clean;
|
|||
|
||||
import java.io.Serializable;
|
||||
import java.util.HashMap;
|
||||
import java.util.Objects;
|
||||
import java.util.concurrent.atomic.AtomicReference;
|
||||
|
||||
import org.apache.commons.lang3.SerializationUtils;
|
||||
|
@ -10,6 +11,7 @@ import org.apache.commons.lang3.StringUtils;
|
|||
|
||||
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableConsumer;
|
||||
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
||||
import eu.dnetlib.dhp.common.vocabulary.VocabularyTerm;
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.oaf.*;
|
||||
|
||||
|
@ -31,29 +33,30 @@ public class CleaningRuleMap extends HashMap<Class<?>, SerializableConsumer<Obje
|
|||
}
|
||||
|
||||
private static void cleanSubject(VocabularyGroup vocabularies, Subject subject) {
|
||||
if (cleanSubjectForVocabulary(ModelConstants.DNET_SUBJECT_FOS_CLASSID, vocabularies, subject)) {
|
||||
return;
|
||||
} else {
|
||||
cleanSubjectForVocabulary(ModelConstants.DNET_SUBJECT_FOS_CLASSID, vocabularies, subject);
|
||||
// TODO cleaning based on different subject vocabs can be added here
|
||||
}
|
||||
}
|
||||
|
||||
private static boolean cleanSubjectForVocabulary(String vocabularyId, VocabularyGroup vocabularies,
|
||||
private static void cleanSubjectForVocabulary(String vocabularyId, VocabularyGroup vocabularies,
|
||||
Subject subject) {
|
||||
AtomicReference<Boolean> modified = new AtomicReference<>(false);
|
||||
|
||||
vocabularies.find(vocabularyId).ifPresent(vocabulary -> {
|
||||
if (!ModelConstants.DNET_SUBJECT_KEYWORD.equalsIgnoreCase(subject.getQualifier().getClassid())) {
|
||||
return;
|
||||
}
|
||||
if (ModelConstants.DNET_SUBJECT_KEYWORD.equalsIgnoreCase(subject.getQualifier().getClassid())) {
|
||||
Qualifier newValue = vocabulary.lookup(subject.getValue());
|
||||
if (!ModelConstants.UNKNOWN.equals(newValue.getClassid())) {
|
||||
subject.setValue(newValue.getClassid());
|
||||
subject.getQualifier().setClassid(vocabularyId);
|
||||
subject.getQualifier().setClassname(vocabulary.getName());
|
||||
modified.set(true);
|
||||
}
|
||||
} else if (vocabularyId.equals(subject.getQualifier().getClassid())) {
|
||||
Qualifier syn = vocabulary.getSynonymAsQualifier(subject.getValue());
|
||||
VocabularyTerm term = vocabulary.getTerm(subject.getValue());
|
||||
if (Objects.isNull(syn) && Objects.isNull(term)) {
|
||||
subject.getQualifier().setClassid(ModelConstants.DNET_SUBJECT_KEYWORD);
|
||||
subject.getQualifier().setClassname(ModelConstants.DNET_SUBJECT_KEYWORD);
|
||||
}
|
||||
}
|
||||
});
|
||||
return modified.get();
|
||||
}
|
||||
|
||||
private static void cleanRelation(VocabularyGroup vocabularies, Relation r) {
|
||||
|
|
|
@ -43,7 +43,7 @@ public class CleanCountrySparkJob implements Serializable {
|
|||
|
||||
String jsonConfiguration = IOUtils
|
||||
.toString(
|
||||
CleanContextSparkJob.class
|
||||
CleanCountrySparkJob.class
|
||||
.getResourceAsStream(
|
||||
"/eu/dnetlib/dhp/oa/graph/input_clean_country_parameters.json"));
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||
|
@ -117,7 +117,7 @@ public class CleanCountrySparkJob implements Serializable {
|
|||
p -> p
|
||||
.getQualifier()
|
||||
.getClassid()
|
||||
.equals(PidType.doi) && pidInParam(p.getValue(), verifyParam))) {
|
||||
.equals(PidType.doi.toString()) && pidInParam(p.getValue(), verifyParam))) {
|
||||
r
|
||||
.setCountry(
|
||||
r
|
||||
|
|
|
@ -65,7 +65,6 @@ public class GetDatasourceFromCountry implements Serializable {
|
|||
conf,
|
||||
isSparkSessionManaged,
|
||||
spark -> {
|
||||
|
||||
getDatasourceFromCountry(spark, country, inputPath, workingPath);
|
||||
});
|
||||
}
|
||||
|
@ -83,7 +82,6 @@ public class GetDatasourceFromCountry implements Serializable {
|
|||
(FilterFunction<Organization>) o -> !o.getDataInfo().getDeletedbyinference() &&
|
||||
o.getCountry().getClassid().length() > 0 &&
|
||||
o.getCountry().getClassid().equals(country));
|
||||
;
|
||||
|
||||
// filtering of the relations taking the non deleted by inference and those with IsProvidedBy as relclass
|
||||
Dataset<Relation> relation = spark
|
||||
|
@ -97,7 +95,7 @@ public class GetDatasourceFromCountry implements Serializable {
|
|||
!rel.getDataInfo().getDeletedbyinference());
|
||||
|
||||
organization
|
||||
.joinWith(relation, organization.col("id").equalTo(relation.col("target")), "left")
|
||||
.joinWith(relation, organization.col("id").equalTo(relation.col("target")))
|
||||
.map((MapFunction<Tuple2<Organization, Relation>, String>) t2 -> t2._2().getSource(), Encoders.STRING())
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
|
|
|
@ -366,6 +366,7 @@ public abstract class AbstractMdRecordToOafMapper {
|
|||
|
||||
r.setInstance(instances);
|
||||
r.setBestaccessright(OafMapperUtils.createBestAccessRights(instances));
|
||||
r.setEoscifguidelines(prepareEOSCIfGuidelines(doc, info));
|
||||
}
|
||||
|
||||
protected abstract List<StructuredProperty> prepareResultPids(Document doc, DataInfo info);
|
||||
|
@ -384,6 +385,25 @@ public abstract class AbstractMdRecordToOafMapper {
|
|||
return list;
|
||||
}
|
||||
|
||||
private List<EoscIfGuidelines> prepareEOSCIfGuidelines(Document doc, DataInfo info) {
|
||||
final Set<EoscIfGuidelines> set = Sets.newHashSet();
|
||||
for (final Object o : doc.selectNodes("//oaf:eoscifguidelines")) {
|
||||
final String code = ((Node) o).valueOf("@code");
|
||||
final String label = ((Node) o).valueOf("@label");
|
||||
final String url = ((Node) o).valueOf("@url");
|
||||
final String semrel = ((Node) o).valueOf("@semanticrelation");
|
||||
if (StringUtils.isNotBlank(code)) {
|
||||
final EoscIfGuidelines eig = new EoscIfGuidelines();
|
||||
eig.setCode(code);
|
||||
eig.setLabel(label);
|
||||
eig.setUrl(url);
|
||||
eig.setSemanticRelation(semrel);
|
||||
set.add(eig);
|
||||
}
|
||||
}
|
||||
return Lists.newArrayList(set);
|
||||
}
|
||||
|
||||
protected abstract Qualifier prepareResourceType(Document doc, DataInfo info);
|
||||
|
||||
protected abstract List<Instance> prepareInstances(
|
||||
|
|
|
@ -177,6 +177,9 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
|||
for (final Object o : doc.selectNodes("//*[local-name()='identifier' and ./@identifierType='landingPage']")) {
|
||||
url.add(trimAndDecodeUrl(((Node) o).getText().trim()));
|
||||
}
|
||||
for (final Object o : doc.selectNodes("//*[local-name()='identifier' and ./@identifierType='w3id']")) {
|
||||
url.add(trimAndDecodeUrl(((Node) o).getText().trim()));
|
||||
}
|
||||
|
||||
Set<String> validUrl = validateUrl(url);
|
||||
|
||||
|
|
|
@ -432,14 +432,14 @@
|
|||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<join name="wait_clean_context" to="getHostedby"/>
|
||||
<join name="wait_clean_context" to="select_datasourceId_from_country"/>
|
||||
|
||||
|
||||
<action name="getHostedby">
|
||||
<action name="select_datasourceId_from_country">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Clean publications context</name>
|
||||
<name>Select datasource ID from country</name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.clean.country.GetDatasourceFromCountry</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
|
@ -471,7 +471,7 @@
|
|||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Clean publications counmtry</name>
|
||||
<name>Clean publication country</name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.clean.country.CleanCountrySparkJob</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
|
@ -489,10 +489,10 @@
|
|||
<arg>--workingPath</arg><arg>${workingDir}/working/publication</arg>
|
||||
<arg>--country</arg><arg>${country}</arg>
|
||||
<arg>--verifyParam</arg><arg>${verifyCountryParam}</arg>
|
||||
<arg>--datasourcePath</arg><arg>${workingDir}/working/hostedby</arg>
|
||||
<arg>--hostedBy</arg><arg>${workingDir}/working/hostedby</arg>
|
||||
<arg>--collectedfrom</arg><arg>${collectedfrom}</arg>
|
||||
</spark>
|
||||
<ok to="wait_clean_context"/>
|
||||
<ok to="wait_clean_country"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
|
@ -500,7 +500,7 @@
|
|||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Clean datasets Country</name>
|
||||
<name>Clean dataset country</name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.clean.country.CleanCountrySparkJob</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
|
@ -518,10 +518,10 @@
|
|||
<arg>--workingPath</arg><arg>${workingDir}/working/dataset</arg>
|
||||
<arg>--country</arg><arg>${country}</arg>
|
||||
<arg>--verifyParam</arg><arg>${verifyCountryParam}</arg>
|
||||
<arg>--datasourcePath</arg><arg>${workingDir}/working/hostedby</arg>
|
||||
<arg>--hostedBy</arg><arg>${workingDir}/working/hostedby</arg>
|
||||
<arg>--collectedfrom</arg><arg>${collectedfrom}</arg>
|
||||
</spark>
|
||||
<ok to="wait_clean_context"/>
|
||||
<ok to="wait_clean_country"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
|
@ -529,7 +529,7 @@
|
|||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Clean otherresearchproducts country</name>
|
||||
<name>Clean otherresearchproduct country</name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.clean.country.CleanCountrySparkJob</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
|
@ -547,10 +547,10 @@
|
|||
<arg>--workingPath</arg><arg>${workingDir}/working/otherresearchproduct</arg>
|
||||
<arg>--country</arg><arg>${country}</arg>
|
||||
<arg>--verifyParam</arg><arg>${verifyCountryParam}</arg>
|
||||
<arg>--datasourcePath</arg><arg>${workingDir}/working/hostedby</arg>
|
||||
<arg>--hostedBy</arg><arg>${workingDir}/working/hostedby</arg>
|
||||
<arg>--collectedfrom</arg><arg>${collectedfrom}</arg>
|
||||
</spark>
|
||||
<ok to="wait_clean_context"/>
|
||||
<ok to="wait_clean_country"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
|
@ -558,7 +558,7 @@
|
|||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Clean softwares country</name>
|
||||
<name>Clean software country</name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.clean.country.CleanCountrySparkJob</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
|
@ -576,7 +576,7 @@
|
|||
<arg>--workingPath</arg><arg>${workingDir}/working/software</arg>
|
||||
<arg>--country</arg><arg>${country}</arg>
|
||||
<arg>--verifyParam</arg><arg>${verifyCountryParam}</arg>
|
||||
<arg>--datasourcePath</arg><arg>${workingDir}/working/hostedby</arg>
|
||||
<arg>--hostedBy</arg><arg>${workingDir}/working/hostedby</arg>
|
||||
<arg>--collectedfrom</arg><arg>${collectedfrom}</arg>
|
||||
</spark>
|
||||
<ok to="wait_clean_country"/>
|
||||
|
|
|
@ -126,6 +126,7 @@
|
|||
<arg>--hiveDbName</arg><arg>${hiveDbName}</arg>
|
||||
<arg>--className</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
||||
<arg>--hiveMetastoreUris</arg><arg>${hiveMetastoreUris}</arg>
|
||||
<arg>--numPartitions</arg><arg>8000</arg>
|
||||
</spark>
|
||||
<ok to="join_import"/>
|
||||
<error to="Kill"/>
|
||||
|
@ -152,6 +153,7 @@
|
|||
<arg>--hiveDbName</arg><arg>${hiveDbName}</arg>
|
||||
<arg>--className</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
||||
<arg>--hiveMetastoreUris</arg><arg>${hiveMetastoreUris}</arg>
|
||||
<arg>--numPartitions</arg><arg>4000</arg>
|
||||
</spark>
|
||||
<ok to="join_import"/>
|
||||
<error to="Kill"/>
|
||||
|
@ -178,6 +180,7 @@
|
|||
<arg>--hiveDbName</arg><arg>${hiveDbName}</arg>
|
||||
<arg>--className</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
||||
<arg>--hiveMetastoreUris</arg><arg>${hiveMetastoreUris}</arg>
|
||||
<arg>--numPartitions</arg><arg>3000</arg>
|
||||
</spark>
|
||||
<ok to="join_import"/>
|
||||
<error to="Kill"/>
|
||||
|
@ -204,6 +207,7 @@
|
|||
<arg>--hiveDbName</arg><arg>${hiveDbName}</arg>
|
||||
<arg>--className</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
||||
<arg>--hiveMetastoreUris</arg><arg>${hiveMetastoreUris}</arg>
|
||||
<arg>--numPartitions</arg><arg>300</arg>
|
||||
</spark>
|
||||
<ok to="join_import"/>
|
||||
<error to="Kill"/>
|
||||
|
@ -230,6 +234,7 @@
|
|||
<arg>--hiveDbName</arg><arg>${hiveDbName}</arg>
|
||||
<arg>--className</arg><arg>eu.dnetlib.dhp.schema.oaf.Datasource</arg>
|
||||
<arg>--hiveMetastoreUris</arg><arg>${hiveMetastoreUris}</arg>
|
||||
<arg>--numPartitions</arg><arg>100</arg>
|
||||
</spark>
|
||||
<ok to="join_import"/>
|
||||
<error to="Kill"/>
|
||||
|
@ -256,6 +261,7 @@
|
|||
<arg>--hiveDbName</arg><arg>${hiveDbName}</arg>
|
||||
<arg>--className</arg><arg>eu.dnetlib.dhp.schema.oaf.Organization</arg>
|
||||
<arg>--hiveMetastoreUris</arg><arg>${hiveMetastoreUris}</arg>
|
||||
<arg>--numPartitions</arg><arg>400</arg>
|
||||
</spark>
|
||||
<ok to="join_import"/>
|
||||
<error to="Kill"/>
|
||||
|
@ -309,6 +315,7 @@
|
|||
<arg>--hiveDbName</arg><arg>${hiveDbName}</arg>
|
||||
<arg>--className</arg><arg>eu.dnetlib.dhp.schema.oaf.Relation</arg>
|
||||
<arg>--hiveMetastoreUris</arg><arg>${hiveMetastoreUris}</arg>
|
||||
<arg>--numPartitions</arg><arg>10000</arg>
|
||||
</spark>
|
||||
<ok to="join_import"/>
|
||||
<error to="Kill"/>
|
||||
|
|
|
@ -116,54 +116,45 @@ object SparkConvertRDDtoDataset {
|
|||
.map(s => mapper.readValue(s, classOf[Relation]))
|
||||
.filter(r => r.getDataInfo != null && !r.getDataInfo.getDeletedbyinference)
|
||||
.filter(r => r.getSource.startsWith("50") && r.getTarget.startsWith("50"))
|
||||
.filter(r => filterRelations(subRelTypeFilter, relClassFilter, r))
|
||||
.filter(r => filterRelations(r))
|
||||
//filter OpenCitations relations
|
||||
.filter(r =>
|
||||
r.getDataInfo.getProvenanceaction != null &&
|
||||
!"sysimport:crosswalk:opencitations".equals(r.getDataInfo.getProvenanceaction.getClassid)
|
||||
)
|
||||
// .filter(r =>
|
||||
// r.getDataInfo.getProvenanceaction != null &&
|
||||
// !"sysimport:crosswalk:opencitations".equals(r.getDataInfo.getProvenanceaction.getClassid)
|
||||
// )
|
||||
|
||||
spark.createDataset(rddRelation).as[Relation].write.mode(SaveMode.Overwrite).save(s"$relPath")
|
||||
}
|
||||
|
||||
private def filterRelations(subRelTypeFilter: String, relClassFilter: List[String], r: Relation): Boolean = {
|
||||
if (StringUtils.isNotBlank(subRelTypeFilter)) {
|
||||
subRelTypeFilter.equalsIgnoreCase(r.getSubRelType)
|
||||
} else {
|
||||
!relClassFilter.exists(k => k.equalsIgnoreCase(r.getRelClass))
|
||||
}
|
||||
}
|
||||
private def filterRelations(r: Relation): Boolean = {
|
||||
|
||||
/*
|
||||
//TODO: finalise implementation
|
||||
private def processResult[T<: Result](
|
||||
implicit ct: ClassTag[T],
|
||||
log: Logger,
|
||||
spark: SparkSession,
|
||||
sourcePath: String,
|
||||
entityPath: String,
|
||||
clazz: Class[T]
|
||||
): Unit = {
|
||||
val entityType = clazz.getSimpleName.toLowerCase
|
||||
|
||||
log.info(s"Converting $entityType")
|
||||
|
||||
val mapper = new ObjectMapper() with ScalaObjectMapper
|
||||
mapper.registerModule(DefaultScalaModule)
|
||||
|
||||
val rdd = spark.sparkContext
|
||||
.textFile(s"$sourcePath/$entityType")
|
||||
.map(s => mapper.readValue(s, clazz))
|
||||
.filter(r => r.getDataInfo != null && !r.getDataInfo.getDeletedbyinference);
|
||||
|
||||
implicit val encoder: Encoder[T] = Encoders.kryo(clazz)
|
||||
spark
|
||||
.createDataset(rdd)
|
||||
.as[T]
|
||||
.write
|
||||
.mode(SaveMode.Overwrite)
|
||||
.save(s"$entityPath/$entityType")
|
||||
}
|
||||
/** *
|
||||
* We filter relation generated by dedups
|
||||
* and all the relation that have one single collectedFrom OpenCitation
|
||||
*/
|
||||
|
||||
val relClassFilter = List(
|
||||
ModelConstants.MERGES,
|
||||
ModelConstants.IS_MERGED_IN,
|
||||
ModelConstants.HAS_AMONG_TOP_N_SIMILAR_DOCS,
|
||||
ModelConstants.IS_AMONG_TOP_N_SIMILAR_DOCS
|
||||
)
|
||||
if (relClassFilter.exists(k => k.equalsIgnoreCase(r.getRelClass)))
|
||||
false
|
||||
else {
|
||||
if (r.getCollectedfrom == null || r.getCollectedfrom.size() == 0)
|
||||
false
|
||||
else if (r.getCollectedfrom.size() > 1)
|
||||
true
|
||||
else if (
|
||||
r.getCollectedfrom.size() == 1 && r.getCollectedfrom.get(0) != null && "OpenCitations".equalsIgnoreCase(
|
||||
r.getCollectedfrom.get(0).getValue
|
||||
)
|
||||
)
|
||||
false
|
||||
else
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -278,6 +278,16 @@ public class GraphCleaningFunctionsTest {
|
|||
s -> "0102 computer and information sciences".equals(s.getValue()) &
|
||||
ModelConstants.DNET_SUBJECT_FOS_CLASSID.equals(s.getQualifier().getClassid())));
|
||||
|
||||
List<Subject> s1 = p_cleaned
|
||||
.getSubject()
|
||||
.stream()
|
||||
.filter(s -> s.getValue().equals("In Situ Hybridization"))
|
||||
.collect(Collectors.toList());
|
||||
assertNotNull(s1);
|
||||
assertEquals(1, s1.size());
|
||||
assertEquals(ModelConstants.DNET_SUBJECT_KEYWORD, s1.get(0).getQualifier().getClassid());
|
||||
assertEquals(ModelConstants.DNET_SUBJECT_KEYWORD, s1.get(0).getQualifier().getClassname());
|
||||
|
||||
// TODO add more assertions to verity the cleaned values
|
||||
System.out.println(MAPPER.writeValueAsString(p_cleaned));
|
||||
}
|
||||
|
|
|
@ -936,11 +936,23 @@ class MappersTest {
|
|||
System.out.println("***************");
|
||||
System.out.println(new ObjectMapper().writeValueAsString(list));
|
||||
System.out.println("***************");
|
||||
// final OtherResearchProduct p = (OtherResearchProduct) list.get(0);
|
||||
// assertValidId(p.getId());
|
||||
// assertValidId(p.getCollectedfrom().get(0).getKey());
|
||||
// System.out.println(p.getTitle().get(0).getValue());
|
||||
// assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue()));
|
||||
assertEquals(5, list.size());
|
||||
final OtherResearchProduct p = (OtherResearchProduct) list.get(0);
|
||||
assertValidId(p.getId());
|
||||
assertTrue(p.getId().startsWith("50|w3id"));
|
||||
assertValidId(p.getCollectedfrom().get(0).getKey());
|
||||
assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue()));
|
||||
assertEquals(1, p.getInstance().size());
|
||||
assertEquals("https://w3id.org/ro-id/0ab171a7-45c5-4194-82d4-850955504bca", p.getPid().get(0).getValue());
|
||||
Instance inst = p.getInstance().get(0);
|
||||
assertEquals("https://w3id.org/ro-id/0ab171a7-45c5-4194-82d4-850955504bca", inst.getPid().get(0).getValue());
|
||||
assertEquals("https://w3id.org/ro-id/0ab171a7-45c5-4194-82d4-850955504bca", inst.getUrl().get(0));
|
||||
assertEquals(1, p.getEoscifguidelines().size());
|
||||
assertEquals("EOSC::RO-crate", p.getEoscifguidelines().get(0).getCode());
|
||||
assertEquals("EOSC::RO-crate", p.getEoscifguidelines().get(0).getLabel());
|
||||
assertEquals("", p.getEoscifguidelines().get(0).getUrl());
|
||||
assertEquals("compliesWith", p.getEoscifguidelines().get(0).getSemanticRelation());
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -988,6 +1000,17 @@ class MappersTest {
|
|||
|
||||
}
|
||||
|
||||
@Test
|
||||
void testEOSCFuture_ROHub() throws IOException {
|
||||
final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("photic-zone-transformed.xml")));
|
||||
final List<Oaf> list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml);
|
||||
final OtherResearchProduct rocrate = (OtherResearchProduct) list.get(0);
|
||||
assertNotNull(rocrate.getEoscifguidelines());
|
||||
System.out.println("***************");
|
||||
System.out.println(new ObjectMapper().writeValueAsString(rocrate));
|
||||
System.out.println("***************");
|
||||
}
|
||||
|
||||
@Test
|
||||
void testNotWellFormed() throws IOException {
|
||||
final String xml = IOUtils
|
||||
|
|
|
@ -706,6 +706,28 @@
|
|||
"source": [
|
||||
],
|
||||
"subject": [
|
||||
{
|
||||
"dataInfo": {
|
||||
"provenanceaction": {
|
||||
"classid": "sysimport:crosswalk:repository",
|
||||
"classname": "sysimport:crosswalk:repository",
|
||||
"schemeid": "dnet:provenanceActions",
|
||||
"schemename": "dnet:provenanceActions"
|
||||
},
|
||||
"deletedbyinference": false,
|
||||
"inferred": false,
|
||||
"inferenceprovenance": "",
|
||||
"invisible": false,
|
||||
"trust": "0.9"
|
||||
},
|
||||
"qualifier": {
|
||||
"classid": "FOS",
|
||||
"classname": "Fields of Science and Technology classification",
|
||||
"schemeid": "dnet:result_subject",
|
||||
"schemename": "dnet:result_subject"
|
||||
},
|
||||
"value": "In Situ Hybridization"
|
||||
},
|
||||
{
|
||||
"dataInfo": {
|
||||
"deletedbyinference": false,
|
||||
|
|
|
@ -0,0 +1,108 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<record xmlns:oaire="http://namespace.openaire.eu/schema/oaire/"
|
||||
xmlns:datacite="http://datacite.org/schema/kernel-4"
|
||||
xmlns:dr="http://www.driver-repository.eu/namespace/dr"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xmlns:oaf="http://namespace.openaire.eu/oaf"
|
||||
xmlns:oai="http://www.openarchives.org/OAI/2.0/"
|
||||
xmlns:dri="http://www.driver-repository.eu/namespace/dri"
|
||||
xmlns:xs="http://www.w3.org/2001/XMLSchema"
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/">
|
||||
<header xmlns="http://www.openarchives.org/OAI/2.0/">
|
||||
<dri:objIdentifier>fsh_____4119::68126da991bd76d8be494bddfbf7a1bb</dri:objIdentifier>
|
||||
<dri:recordIdentifier>https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be</dri:recordIdentifier>
|
||||
<dri:dateOfCollection/>
|
||||
<dri:mdFormat/>
|
||||
<dri:mdFormatInterpretation/>
|
||||
<dri:repositoryId/>
|
||||
<dr:objectIdentifier/>
|
||||
<dr:dateOfCollection>2022-11-15T12:29:19Z</dr:dateOfCollection>
|
||||
<dr:dateOfTransformation>2022-11-15T12:29:19Z</dr:dateOfTransformation>
|
||||
<oaf:datasourceprefix>fsh_____4119</oaf:datasourceprefix>
|
||||
<identifier>https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be</identifier>
|
||||
<datestamp>2022-11-15T12:29:19Z</datestamp>
|
||||
<setSpec>rohub_data</setSpec>
|
||||
<setSpec>ro-crate_data</setSpec>
|
||||
</header>
|
||||
<metadata>
|
||||
<datacite:resource>
|
||||
<datacite:identifier identifierType="w3id">https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be</datacite:identifier>
|
||||
<datacite:alternateIdentifiers>
|
||||
<datacite:alternateIdentifier alternateIdentifierType="w3id">https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be</datacite:alternateIdentifier>
|
||||
</datacite:alternateIdentifiers>
|
||||
<datacite:relatedIdentifiers>
|
||||
<datacite:relatedIdentifier relatedIdentifierType="w3id" relationType="HasPart">https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be/resources/b1b617b2-6b79-4bae-9fa6-b76945645626</datacite:relatedIdentifier>
|
||||
<datacite:relatedIdentifier relatedIdentifierType="w3id" relationType="HasPart">https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be/resources/78103994-30be-4875-bf89-5acd752b5c3d</datacite:relatedIdentifier>
|
||||
<datacite:relatedIdentifier relatedIdentifierType="w3id" relationType="HasPart">https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be/resources/18fd1c70-249b-4c67-80ee-539f801a0da7</datacite:relatedIdentifier>
|
||||
<datacite:relatedIdentifier relatedIdentifierType="w3id" relationType="HasPart">https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be/resources/32faa2eb-4cc8-401f-ac5c-bec2849b70e1</datacite:relatedIdentifier>
|
||||
<datacite:relatedIdentifier relatedIdentifierType="w3id" relationType="HasPart">https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be/resources/4c253f5a-d427-40c2-9e9f-6063ae087239</datacite:relatedIdentifier>
|
||||
<datacite:relatedIdentifier relatedIdentifierType="w3id" relationType="HasPart">https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be/resources/371b1957-078c-472b-a195-af7bce152c10</datacite:relatedIdentifier>
|
||||
<datacite:relatedIdentifier relatedIdentifierType="w3id" relationType="HasPart">https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be/resources/82f9e4b8-01b4-4e50-9e27-ec9d337c8d74</datacite:relatedIdentifier>
|
||||
</datacite:relatedIdentifiers>
|
||||
<datacite:resourceType xs:anyURI="http://purl.org/coar/resource_type/c_1843">RO-crate</datacite:resourceType>
|
||||
<datacite:rightsList>
|
||||
<datacite:rights rightsURI="https://creativecommons.org/licenses/by/4.0/legalcode">Creative Commons Attribution 4.0 International</datacite:rights>
|
||||
<datacite:rights rightsURI="http://purl.org/coar/access_right/c_abf2">open access</datacite:rights>
|
||||
</datacite:rightsList>
|
||||
<datacite:titles>
|
||||
<datacite:title>Mapping the photic zone of the Mediterranean Sea</datacite:title>
|
||||
</datacite:titles>
|
||||
<datacite:descriptions>
|
||||
<datacite:description descriptionType="Abstract">Estimating the penetration of light along the water column from satellite data to map the photic zone in the Mediterranean Sea</datacite:description>
|
||||
</datacite:descriptions>
|
||||
<datacite:publisher>CNR-ISMAR</datacite:publisher>
|
||||
<creators xmlns="http://datacite.org/schema/kernel-4">
|
||||
<creator>
|
||||
<creatorName>Giorgio Castellan</creatorName>
|
||||
</creator>
|
||||
<creator>
|
||||
<creatorName>Lorenzo Angeletti</creatorName>
|
||||
</creator>
|
||||
<creator>
|
||||
<creatorName>Paolo Montagna</creatorName>
|
||||
</creator>
|
||||
<creator>
|
||||
<creatorName>Marco Taviani</creatorName>
|
||||
</creator>
|
||||
</creators>
|
||||
<dates xmlns="http://datacite.org/schema/kernel-4">
|
||||
<date dateType="Issued">2022-11-14T16:32:45Z</date>
|
||||
</dates>
|
||||
<dc:descriptions>
|
||||
<dc:description descriptionType="Abstract">Estimating the penetration of light along the water column from satellite data to map the photic zone in the Mediterranean Sea</dc:description>
|
||||
</dc:descriptions>
|
||||
<dc:publicationYear>2022</dc:publicationYear>
|
||||
<rightsList xmlns="http://datacite.org/schema/kernel-4">
|
||||
<rights rightsURI="http://purl.org/coar/access_right/c_abf2">open access</rights>
|
||||
</rightsList>
|
||||
<sizes xmlns="http://datacite.org/schema/kernel-4">
|
||||
<size>813.478 KB</size>
|
||||
</sizes>
|
||||
<subjects xmlns="http://datacite.org/schema/kernel-4">
|
||||
<subject>Earth sciences</subject>
|
||||
<subject>Ecology</subject>
|
||||
<subject>Optics</subject>
|
||||
</subjects>
|
||||
</datacite:resource>
|
||||
<oaf:identifier identifierType="w3id">https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be</oaf:identifier>
|
||||
<dr:CobjCategory type="other">0048</dr:CobjCategory>
|
||||
<oaf:dateAccepted>2022-11-14</oaf:dateAccepted>
|
||||
<oaf:accessrights>OPEN</oaf:accessrights>
|
||||
<oaf:license>https://creativecommons.org/licenses/by/4.0/legalcode</oaf:license>
|
||||
<oaf:language/>
|
||||
<oaf:hostedBy name="ROHub" id="fairsharing_::4119"/>
|
||||
<oaf:collectedFrom name="ROHub" id="fairsharing_::4119"/>
|
||||
<oaf:eoscifguidelines code="EOSC::RO-crate"
|
||||
label="EOSC::RO-crate"
|
||||
url=""
|
||||
semanticrelation="compliesWith"/>
|
||||
<oaf:eoscifguidelines code="EOSC::Jupyter Notebook"
|
||||
label="EOSC::Jupyter Notebook"
|
||||
url=""
|
||||
semanticrelation="compliesWith"/>
|
||||
<oaf:eoscifguidelines code="EOSC::Data Cube"
|
||||
label="EOSC::Data Cube"
|
||||
url=""
|
||||
semanticrelation="compliesWith"/>
|
||||
</metadata>
|
||||
</record>
|
|
@ -21,15 +21,13 @@
|
|||
</header>
|
||||
<metadata>
|
||||
<datacite:resource>
|
||||
<datacite:identifier identifierType="URL">https://w3id.org/ro-id/0ab171a7-45c5-4194-82d4-850955504bca</datacite:identifier>
|
||||
<datacite:alternateIdentifiers/>
|
||||
<datacite:identifier identifierType="w3id">https://w3id.org/ro-id/0ab171a7-45c5-4194-82d4-850955504bca</datacite:identifier>
|
||||
<datacite:alternateIdentifiers>
|
||||
<datacite:alternateIdentifier alternateIdentifierType="w3id">https://w3id.org/ro-id/0ab171a7-45c5-4194-82d4-850955504bca</datacite:alternateIdentifier>
|
||||
</datacite:alternateIdentifiers>
|
||||
<datacite:relatedIdentifiers>
|
||||
<datacite:relatedIdentifier relatedIdentifierType="" relationType="">
|
||||
https://w3id.org/ro-id/0ab171a7-45c5-4194-82d4-850955504bca/resources/24fae96f-f986-46e1-bfd0-a21ca20ff0ce
|
||||
</datacite:relatedIdentifier>
|
||||
<datacite:relatedIdentifier relatedIdentifierType="" relationType="">
|
||||
https://w3id.org/ro-id/0ab171a7-45c5-4194-82d4-850955504bca/resources/6d3427a8-352e-49f4-9796-f618c44dc16d
|
||||
</datacite:relatedIdentifier>
|
||||
<datacite:relatedIdentifier relatedIdentifierType="w3id" relationType="HasPart">https://w3id.org/ro-id/0ab171a7-45c5-4194-82d4-850955504bca/resources/24fae96f-f986-46e1-bfd0-a21ca20ff0ce</datacite:relatedIdentifier>
|
||||
<datacite:relatedIdentifier relatedIdentifierType="w3id" relationType="HasPart">https://w3id.org/ro-id/0ab171a7-45c5-4194-82d4-850955504bca/resources/6d3427a8-352e-49f4-9796-f618c44dc16d</datacite:relatedIdentifier>
|
||||
</datacite:relatedIdentifiers>
|
||||
<datacite:resourceType xs:anyURI="http://purl.org/coar/resource_type/c_1843">RO-crate</datacite:resourceType>
|
||||
<datacite:rightsList>
|
||||
|
@ -43,21 +41,17 @@
|
|||
</datacite:descriptions>
|
||||
<datacite:publisher>Poznań Supercomputing and Networking Center</datacite:publisher>
|
||||
<contributors xmlns="http://datacite.org/schema/kernel-4">
|
||||
<contributor>
|
||||
<contributor contributorType="Researcher">
|
||||
<contributorName>Generation Service</contributorName>
|
||||
</contributor>
|
||||
</contributor>
|
||||
</contributors>
|
||||
<creators xmlns="http://datacite.org/schema/kernel-4">
|
||||
<creator>
|
||||
<creator>
|
||||
<creatorName>CNR-ISMAR</creatorName>
|
||||
</creator>
|
||||
</creator>
|
||||
</creators>
|
||||
<dates xmlns="http://datacite.org/schema/kernel-4">
|
||||
<date dateType="Created">2018-06-20T11:21:46Z</date>
|
||||
<date dateType="Issued">2018-06-20T11:21:46Z</date>
|
||||
</dates>
|
||||
<dc:descriptions>
|
||||
<dc:description descriptionType="Abstract">The use of biological effects tools offer enormous potential to meet the challenges outlined by the European Union Marine Strategy Framework Directive (MSFD) whereby Member States are required to develop a robust set of tools for defining 11 qualitative descriptors of Good Environmental Status (GES), such as demonstrating that "Concentrations of contaminants are at levels not giving rise to pollution effects" (GES Descriptor 8). This paper discusses the combined approach of monitoring chemical contaminant levels, along side biological effect measurements relating to the effect of pollutants, for undertaking assessments of GES across European marine regions. We outline the minimum standards that biological effects tools should meet if they are to be used for defining GES in relation to Descriptor 8 and describe the current international initiatives underway to develop assessment criteria for these biological effects techniques. Crown Copyright (C) 2010 Published by Elsevier Ltd. All rights reserved.</dc:description>
|
||||
|
@ -71,15 +65,18 @@
|
|||
</sizes>
|
||||
<subjects xmlns="http://datacite.org/schema/kernel-4">
|
||||
<subject>Ecology</subject>
|
||||
<subject>EOSC::RO-crate</subject>
|
||||
</subjects>
|
||||
</datacite:resource>
|
||||
<oaf:identifier identifierType="URL">https://w3id.org/ro-id/0ab171a7-45c5-4194-82d4-850955504bca</oaf:identifier>
|
||||
<oaf:identifier identifierType="w3id">https://w3id.org/ro-id/0ab171a7-45c5-4194-82d4-850955504bca</oaf:identifier>
|
||||
<dr:CobjCategory type="other research product">other research product</dr:CobjCategory>
|
||||
<oaf:dateAccepted/>
|
||||
<oaf:dateAccepted>2018-06-20</oaf:dateAccepted>
|
||||
<oaf:accessrights>OPEN</oaf:accessrights>
|
||||
<oaf:language/>
|
||||
<oaf:hostedBy name="ROHub" id="fairsharing_::4119"/>
|
||||
<oaf:collectedFrom name="ROHub" id="fairsharing_::4119"/>
|
||||
<oaf:eoscifguidelines code="EOSC::RO-crate"
|
||||
label="EOSC::RO-crate"
|
||||
url=""
|
||||
semanticrelation="compliesWith"/>
|
||||
</metadata>
|
||||
</record>
|
|
@ -0,0 +1,88 @@
|
|||
package eu.dnetlib.dhp.oa.provision;
|
||||
|
||||
import com.fasterxml.jackson.databind.DeserializationFeature;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import eu.dnetlib.dhp.oa.provision.model.JoinedEntity;
|
||||
import eu.dnetlib.dhp.oa.provision.utils.ContextMapper;
|
||||
import eu.dnetlib.dhp.oa.provision.utils.StreamingInputDocumentFactory;
|
||||
import eu.dnetlib.dhp.oa.provision.utils.XmlRecordFactory;
|
||||
import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct;
|
||||
import eu.dnetlib.dhp.utils.saxon.SaxonTransformerFactory;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.solr.client.solrj.util.ClientUtils;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.dom4j.Document;
|
||||
import org.dom4j.DocumentException;
|
||||
import org.dom4j.io.SAXReader;
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import javax.xml.transform.Transformer;
|
||||
import javax.xml.transform.TransformerException;
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
||||
|
||||
public class EOSCFuture_Test {
|
||||
|
||||
public static ObjectMapper OBJECT_MAPPER = new ObjectMapper()
|
||||
.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
|
||||
|
||||
public static final String VERSION = "2021-04-15T10:05:53Z";
|
||||
public static final String DSID = "b9ee796a-c49f-4473-a708-e7d67b84c16d_SW5kZXhEU1Jlc291cmNlcy9JbmRleERTUmVzb3VyY2VUeXBl";
|
||||
|
||||
private ContextMapper contextMapper;
|
||||
|
||||
@BeforeEach
|
||||
public void setUp() {
|
||||
contextMapper = new ContextMapper();
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testEOSC_ROHub() throws IOException, DocumentException, TransformerException {
|
||||
|
||||
final ContextMapper contextMapper = new ContextMapper();
|
||||
|
||||
final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false,
|
||||
XmlConverterJob.schemaLocation);
|
||||
|
||||
final OtherResearchProduct p = OBJECT_MAPPER
|
||||
.readValue(IOUtils.toString(getClass().getResourceAsStream("eosc-future/photic-zone.json")), OtherResearchProduct.class);
|
||||
|
||||
final String xml = xmlRecordFactory.build(new JoinedEntity<>(p));
|
||||
|
||||
assertNotNull(xml);
|
||||
|
||||
final Document doc = new SAXReader().read(new StringReader(xml));
|
||||
|
||||
assertNotNull(doc);
|
||||
System.out.println(doc.asXML());
|
||||
|
||||
|
||||
testRecordTransformation(xml);
|
||||
}
|
||||
|
||||
|
||||
private void testRecordTransformation(final String record) throws IOException, TransformerException {
|
||||
final String fields = IOUtils.toString(getClass().getResourceAsStream("fields.xml"));
|
||||
final String xslt = IOUtils.toString(getClass().getResourceAsStream("layoutToRecordTransformer.xsl"));
|
||||
|
||||
final String transformer = XmlIndexingJob.getLayoutTransformer("DMF", fields, xslt);
|
||||
|
||||
final Transformer tr = SaxonTransformerFactory.newInstance(transformer);
|
||||
|
||||
final String indexRecordXML = XmlIndexingJob.toIndexRecord(tr, record);
|
||||
|
||||
final SolrInputDocument solrDoc = new StreamingInputDocumentFactory(VERSION, DSID)
|
||||
.parseDocument(indexRecordXML);
|
||||
|
||||
final String xmlDoc = ClientUtils.toXML(solrDoc);
|
||||
|
||||
Assertions.assertNotNull(xmlDoc);
|
||||
System.out.println(xmlDoc);
|
||||
}
|
||||
|
||||
}
|
|
@ -128,6 +128,20 @@ public class IndexRecordTransformerTest {
|
|||
testRecordTransformation(record);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testForEOSCFutureSoftwareNotebook() throws IOException, TransformerException {
|
||||
final String record = IOUtils
|
||||
.toString(getClass().getResourceAsStream("eosc-future/software-justthink.xml"));
|
||||
testRecordTransformation(record);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testForEOSCFutureSoftwareNotebookClaim() throws IOException, TransformerException {
|
||||
final String record = IOUtils
|
||||
.toString(getClass().getResourceAsStream("eosc-future/software-justthink-claim.xml"));
|
||||
testRecordTransformation(record);
|
||||
}
|
||||
|
||||
@Test
|
||||
void testDoiUrlNormalization() throws MalformedURLException {
|
||||
|
||||
|
|
File diff suppressed because one or more lines are too long
|
@ -0,0 +1,305 @@
|
|||
<record>
|
||||
<result xmlns:dri="http://www.driver-repository.eu/namespace/dri">
|
||||
<header xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
|
||||
<dri:objIdentifier>od______2659::3801993ea8f970cfc991277160edf277</dri:objIdentifier>
|
||||
<dri:dateOfCollection>2022-08-08T03:06:13Z</dri:dateOfCollection>
|
||||
<status>under curation</status>
|
||||
<counters/>
|
||||
</header>
|
||||
<metadata>
|
||||
<oaf:entity xmlns:oaf="http://namespace.openaire.eu/oaf"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://namespace.openaire.eu/oaf https://www.openaire.eu/schema/1.0/oaf-1.0.xsd">
|
||||
<oaf:result>
|
||||
<title classid="main title" classname="main title"
|
||||
schemeid="dnet:dataCite_title" schemename="dnet:dataCite_title">JUSThink
|
||||
Alignment Analysis</title>
|
||||
<creator rank="1" name="" surname="">Norman, Utku</creator>
|
||||
<creator rank="2" name="" surname="">Dinkar, Tanvi</creator>
|
||||
<creator rank="3" name="" surname="">Bruno, Barbara</creator>
|
||||
<creator rank="4" name="" surname="">Clavel, Chloé</creator>
|
||||
<dateofacceptance/>
|
||||
<resulttype classid="software" classname="software"
|
||||
schemeid="dnet:result_typologies" schemename="dnet:result_typologies"/>
|
||||
<language classid="eng" classname="English" schemeid="dnet:languages"
|
||||
schemename="dnet:languages"/>
|
||||
<description>
|
||||
<p>
|
||||
<strong>1. Description</strong>
|
||||
</p>
|
||||
<p>This repository contains<strong> tools to automatically analyse how
|
||||
participants align their use of task-specific referents in their
|
||||
dialogue and actions for a collaborative learning activity, and how
|
||||
it relates to the task success</strong> (i.e. their learning
|
||||
outcomes and task performance).</p>
|
||||
<p>As a use case, it processes data from a collaborative problem solving
|
||||
activity named JUSThink <a
|
||||
href="https://zenodo.org/record/4675070#references">[1, 2]</a>, i.e.
|
||||
JUSThink Dialogue and Actions Corpus data set that is available from the
|
||||
Zenodo Repository, DOI: <a href="http://doi.org/10.5281/zenodo.4627104"
|
||||
>10.5281/zenodo.4627104</a>, and reproduces the results and figures
|
||||
in <a href="https://zenodo.org/record/4675070#references">[3]</a>.</p>
|
||||
<p>In brief: </p>
|
||||
<ol>
|
||||
<li><strong>JUSThink Dialogue and Actions Corpus</strong> contains
|
||||
transcripts, event logs, and test responses of children aged 9
|
||||
through 12, as they participate in the JUSThink activity <a
|
||||
href="https://zenodo.org/record/4675070#references">[1, 2]</a>
|
||||
in pairs of two, to solve a problem on graphs together. </li>
|
||||
<li><strong>The JUSThink activity and its study</strong> is first
|
||||
described in <a href="https://zenodo.org/record/4675070#references"
|
||||
>[1]</a>, and elaborated with findings concerning the link
|
||||
between children's learning, performance in the activity, and
|
||||
perception of self, the other and the robot in <a
|
||||
href="https://zenodo.org/record/4675070#references">[2]</a>. </li>
|
||||
<li><strong>Alignment analysis in our work <a
|
||||
href="https://zenodo.org/record/4675070#references"
|
||||
>[3]</a></strong> studies the participants' use of
|
||||
expressions that are related to the task at hand, their follow up
|
||||
actions of these expressions, and how it links to task success.</li>
|
||||
</ol>
|
||||
<p>
|
||||
<strong>2. Publications</strong>
|
||||
</p>
|
||||
<p>If you use this work in an academic context, please cite the following
|
||||
publications:</p>
|
||||
<ul>
|
||||
<li>
|
||||
<p>Norman*, U., Dinkar*, T., Bruno, B., & Clavel, C. (2022).
|
||||
Studying Alignment in a Collaborative Learning Activity via
|
||||
Automatic Methods: The Link Between What We Say and Do. Dialogue
|
||||
& Discourse, 13(2), 1 - ;48. *Contributed equally to this
|
||||
work. <a href="https://doi.org/10.5210/dad.2022.201"
|
||||
>https://doi.org/10.5210/dad.2022.201</a></p>
|
||||
</li>
|
||||
<li>
|
||||
<p>Norman, U., Dinkar, T., Bruno, B., & Clavel, C. (2021).
|
||||
JUSThink Alignment Analysis. In Dialogue & Discourse
|
||||
(v1.0.0, Vol. 13, Number 2, pp. 1 - ;48). Zenodo. <a
|
||||
href="https://doi.org/10.5281/zenodo.4675070"
|
||||
>https://doi.org/10.5281/zenodo.4675070</a></p>
|
||||
</li>
|
||||
</ul>
|
||||
<p>
|
||||
<strong>3. Content</strong>
|
||||
</p>
|
||||
<p>The tools provided in this repository consists of 7 Jupyter Notebooks
|
||||
written in Python 3, and two additional external tools utilised by the
|
||||
notebooks.</p>
|
||||
<p>
|
||||
<strong>3.1. Jupyter Notebooks</strong>
|
||||
</p>
|
||||
<p>We highlight that the notebooks up until the last (i.e. to test the
|
||||
hypotheses (tools/7_test_the_hypotheses.ipynb)) present a general
|
||||
pipeline to process event logs, test responses and transcripts to
|
||||
extract measures of task performance, learning outcomes, and measures of
|
||||
alignment.</p>
|
||||
<ol>
|
||||
<li><strong>Extract task performance (and other features) from the logs
|
||||
</strong>(tools/1_extract_performance_and_other_features_from_logs.ipynb):
|
||||
Extracts various measures of task behaviour from the logs, at
|
||||
varying granularities of the activity (i.e. the whole corpus, task,
|
||||
attempt, and turn levels). In later notebooks, we focus on one of
|
||||
the features to estimate the task performance of a team: (minimum)
|
||||
error.</li>
|
||||
<li><strong>Extract learning outcomes from the test responses</strong>
|
||||
(tools/2_extract_learning_gain_from_test_responses.ipynb): Extracts
|
||||
measures of learning outcomes from the responses to the pre-test and
|
||||
the post-test. In later notebooks, we focus on one of the features
|
||||
to estimate the learning outcome of a team: relative learning gain
|
||||
<a href="https://sandbox.zenodo.org/record/742549#references"
|
||||
>[4]</a></li>
|
||||
<li><strong>Select and visualise a subset of teams for
|
||||
transcription</strong>
|
||||
(tools/3_visualise_transcribed_teams.ipynb): Visualises the
|
||||
transcribed teams among the other teams in the feature space spanned
|
||||
by task performance and learning outcome, as well as the
|
||||
distribution of their number of attempts and turns.</li>
|
||||
<li><strong>Extract routines from transcripts</strong>
|
||||
(tools/4_extract_routines_from_transcripts.ipynb) (uses <a
|
||||
href="https://github.com/GuillaumeDD/dialign">dialign</a> to
|
||||
extract routines): Extracts routines of referring expressions that
|
||||
are "fixed", i.e. become shared or established amongst
|
||||
interlocutors.</li>
|
||||
<li><strong>Combine transcripts with logs</strong>
|
||||
(tools/5_construct_the_corpus_by_combining_transcripts_with_logs.ipynb):
|
||||
Merges transcripts with event logs to have a combined dialogue and
|
||||
actions corpus, to be processed e.g. to detect follow-up
|
||||
actions.</li>
|
||||
<li><strong>Recognise instructions and detect follow-up actions</strong>
|
||||
(tools/6_recognise_instructions_detect_follow-up_actions.ipynb):
|
||||
Extracts verbalised instruction such as "connect Mount Basel to
|
||||
Montreux", and pairs them with the follow-up action that may
|
||||
<em>match</em> (e.g. if the other connects Basel to Montreux) or
|
||||
<em>mismatch</em> (e.g. if the other connects Basel to
|
||||
Neuchatel) with the instruction.</li>
|
||||
<li><strong>Test the hypotheses </strong>in <a
|
||||
href="https://sandbox.zenodo.org/record/742549#references"
|
||||
>[3]</a> (tools/7_test_the_hypotheses.ipynb) (uses
|
||||
<strong>effsize</strong> to estimate effect size, specifically
|
||||
Cliff's Delta): Considers each research questions and hypotheses
|
||||
studied in <a
|
||||
href="https://sandbox.zenodo.org/record/742549#references"
|
||||
>[3]</a> and generates the results in <a
|
||||
href="https://sandbox.zenodo.org/record/742549#references"
|
||||
>[3]</a>.</li>
|
||||
</ol>
|
||||
<p>
|
||||
<strong>3.2. External Tools</strong>
|
||||
</p>
|
||||
<ol>
|
||||
<li><strong><a href="https://github.com/GuillaumeDD/dialign">dialign</a>
|
||||
tool</strong> to extract routines, specifically <a
|
||||
href="https://github.com/GuillaumeDD/dialign/releases/tag/v1.0"
|
||||
>Release 1.0</a> from <a
|
||||
href="https://github.com/GuillaumeDD/dialign/releases/download/v1.0/dialign-1.0.zip"
|
||||
>dialign-1.0.zip</a>:\n It extracts routine expressions that are
|
||||
"shared" among the participants from transcripts. \n It is
|
||||
used as an external module (in accordance with its CeCILL-B License,
|
||||
see <strong>License</strong>).</li>
|
||||
<li><strong>effsize tool</strong> to compute estimators of effect
|
||||
size.\n We specifically use it to compute Cliff's Delta, which
|
||||
quantifies the amount difference between two groups of observations,
|
||||
by computing the Cliff's Delta statistic.\n It is taken from
|
||||
project <a
|
||||
href="https://acclab.github.io/DABEST-python-docs/index.html"
|
||||
>DABEST</a> (see <strong>License</strong>).</li>
|
||||
</ol>
|
||||
<p>
|
||||
<strong>4. Research Questions and Hypotheses in <a
|
||||
href="https://sandbox.zenodo.org/record/742549#references"
|
||||
>[3]</a></strong>
|
||||
</p>
|
||||
<ul>
|
||||
<li><strong>RQ1 Lexical alignment</strong>: How do the interlocutors
|
||||
<em>use</em> expressions related to the task? Is this associated
|
||||
with task success? <ul>
|
||||
<li><strong>H1.1</strong>: Task-specific referents become
|
||||
routine early for more successful teams.</li>
|
||||
<li><strong>H1.2</strong>: Hesitation phenomena are more likely
|
||||
to occur in the vicinity of priming and establishment of
|
||||
task-specific referents for more successful teams.</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li><strong>RQ2 Behavioural alignment</strong>: How do the interlocutors
|
||||
<em>follow up</em> these expressions with actions? Is this
|
||||
associated with task success? <ul>
|
||||
<li><strong>H2.1</strong>: Instructions are more likely to be
|
||||
followed by a corresponding action early in the dialogue for
|
||||
more successful teams.</li>
|
||||
<li><strong>H2.2</strong>: When instructions are followed by a
|
||||
corresponding or a different action, the action is more
|
||||
likely to be in the vicinity of information management
|
||||
phenomena for more successful teams.</li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
<p>The RQs and Hs are addressed in the notebook for testing the hypotheses
|
||||
(i.e. tools/7_test_the_hypotheses.ipynb).</p>
|
||||
<p>
|
||||
<strong>Acknowledgements</strong>
|
||||
</p>
|
||||
<p>This project has received funding from the European Union's Horizon
|
||||
2020 research and innovation programme under grant agreement No 765955.
|
||||
Namely, the <a href="https://www.animatas.eu/">ANIMATAS Project</a>.</p>
|
||||
<p>
|
||||
<strong>License</strong>
|
||||
</p>
|
||||
<p>The whole package is under MIT License, see the <strong>LICENSE</strong>
|
||||
file.</p>
|
||||
<p>Classes under the <strong>tools/effsize</strong> package were taken from
|
||||
project <a href="https://acclab.github.io/DABEST-python-docs/index.html"
|
||||
><strong>DABEST</strong></a>, Copyright 2016-2020 Joses W. Ho.
|
||||
These classes are licensed under the BSD 3-Clause Clear License. See
|
||||
<strong>tools/effsize/LICENSE</strong> file for additional
|
||||
details.</p>
|
||||
<p>Classes under the <strong>tools/dialign-1.0</strong> package were taken
|
||||
from project <strong><a href="https://github.com/GuillaumeDD/dialign"
|
||||
>dialign</a></strong>. These classes are licensed under the
|
||||
CeCILL-B License. This package is used as an "external
|
||||
module", see<strong> tools/dialign-1.0/LICENSE.txt</strong> for
|
||||
additional details.</p>
|
||||
</description>
|
||||
<country classid="" classname="" schemeid="" schemename=""/>
|
||||
<subject classid="" classname="" schemeid="" schemename=""/>
|
||||
<relevantdate classid="" classname="" schemeid="" schemename=""/>
|
||||
<publisher>Zenodo</publisher>
|
||||
<embargoenddate/>
|
||||
<journal issn="" eissn="" lissn="" ep="" iss="" sp="" vol=""/>
|
||||
<source/>
|
||||
<fulltext/>
|
||||
<format/>
|
||||
<storagedate/>
|
||||
<resourcetype classid="" classname="" schemeid="" schemename=""/>
|
||||
<device/>
|
||||
<size/>
|
||||
<version/>
|
||||
<lastmetadataupdate/>
|
||||
<metadataversionnumber/>
|
||||
<documentationUrl/>
|
||||
<codeRepositoryUrl/>
|
||||
<programmingLanguage classid="" classname="" schemeid="" schemename=""/>
|
||||
<contactperson/>
|
||||
<contactgroup/>
|
||||
<tool/>
|
||||
<originalId>oai:zenodo.org:4675070</originalId>
|
||||
<collectedfrom name="ZENODO" id="opendoar____::358aee4cc897452c00244351e4d91f69"/>
|
||||
<pid classid="oai" classname="Open Archives Initiative"
|
||||
schemeid="dnet:pid_types" schemename="dnet:pid_types"
|
||||
>oai:zenodo.org:4675070</pid>
|
||||
<pid classid="doi" classname="Digital Object Identifier"
|
||||
schemeid="dnet:pid_types" schemename="dnet:pid_types"
|
||||
>10.5281/zenodo.4675070</pid>
|
||||
<bestaccessright classid="OPEN" classname="Open Access"
|
||||
schemeid="dnet:access_modes" schemename="dnet:access_modes"/>
|
||||
<eoscifguidelines code="EOSC::Jupyter Notebook" label="EOSC::Jupyter Notebook"
|
||||
url="" semanticrelation="compliesWith"/>
|
||||
<datainfo>
|
||||
<inferred>false</inferred>
|
||||
<deletedbyinference>false</deletedbyinference>
|
||||
<trust>0.9</trust>
|
||||
<inferenceprovenance/>
|
||||
<provenanceaction classid="user:insert" classname="user:insert"
|
||||
schemeid="dnet:provenanceActions" schemename="dnet:provenanceActions"/>
|
||||
</datainfo>
|
||||
<rels>
|
||||
<rel inferred="false" trust="0.9" inferenceprovenance=""
|
||||
provenanceaction="user:claim">
|
||||
<to class="isProducedBy" scheme="dnet:result_project_relations"
|
||||
type="project">corda__h2020::c4515ebef538a734cf11f795347f5dac</to>
|
||||
<code>765955</code>
|
||||
<acronym>ANIMATAS</acronym>
|
||||
<title>Advancing intuitive human-machine interaction with human-like
|
||||
social capabilities for education in schools</title>
|
||||
<contracttype classid="" classname="" schemeid="" schemename=""/>
|
||||
<funding>
|
||||
<funder id="ec__________::EC" shortname="EC"
|
||||
name="European Commission" jurisdiction=""/>
|
||||
<funding_level_0 name="H2020"
|
||||
>ec__________::EC::H2020</funding_level_0>
|
||||
</funding>
|
||||
<websiteurl/>
|
||||
</rel>
|
||||
</rels>
|
||||
<children>
|
||||
<instance id="od______2659::3801993ea8f970cfc991277160edf277">
|
||||
<instancetype classid="0029" classname="Software"
|
||||
schemeid="dnet:publication_resource"
|
||||
schemename="dnet:publication_resource"/>
|
||||
<collectedfrom name="ZENODO"
|
||||
id="opendoar____::358aee4cc897452c00244351e4d91f69"/>
|
||||
<hostedby name="ZENODO"
|
||||
id="opendoar____::358aee4cc897452c00244351e4d91f69"/>
|
||||
<accessright classid="OPEN" classname="Open Access"
|
||||
schemeid="dnet:access_modes" schemename="dnet:access_modes"/>
|
||||
<dateofacceptance/>
|
||||
<webresource>
|
||||
<url>https://zenodo.org/record/4675070</url>
|
||||
</webresource>
|
||||
</instance>
|
||||
</children>
|
||||
</oaf:result>
|
||||
</oaf:entity>
|
||||
</metadata>
|
||||
</result>
|
||||
</record>
|
|
@ -0,0 +1,429 @@
|
|||
<record>
|
||||
<result xmlns:dri="http://www.driver-repository.eu/namespace/dri">
|
||||
<header xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
|
||||
<dri:objIdentifier>doi_dedup___::c054151b6a8c4f41c7acf160651a6503</dri:objIdentifier>
|
||||
<dri:dateOfCollection>2022-10-13T00:15:44+0000</dri:dateOfCollection>
|
||||
<dri:dateOfTransformation>2022-10-13T07:44:29.152Z</dri:dateOfTransformation>
|
||||
</header>
|
||||
<metadata>
|
||||
<oaf:entity xmlns:oaf="http://namespace.openaire.eu/oaf"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://namespace.openaire.eu/oaf https://www.openaire.eu/schema/1.0/oaf-1.0.xsd">
|
||||
<oaf:result>
|
||||
<collectedfrom name="ZENODO" id="opendoar____::358aee4cc897452c00244351e4d91f69"/>
|
||||
<collectedfrom name="Datacite" id="openaire____::9e3be59865b2c1c335d32dae2fe7b254"/>
|
||||
<originalId>oai:zenodo.org:4675070</originalId>
|
||||
<originalId>50|od______2659::3801993ea8f970cfc991277160edf277</originalId>
|
||||
<originalId>oai:zenodo.org:6974562</originalId>
|
||||
<originalId>50|od______2659::9c87ff4a5e7710052b873088e7265072</originalId>
|
||||
<originalId>10.5281/zenodo.4675069</originalId>
|
||||
<pid classid="doi" classname="Digital Object Identifier" schemeid="dnet:pid_types"
|
||||
schemename="dnet:pid_types" inferred="false"
|
||||
provenanceaction="sysimport:crosswalk:repository" trust="0.9"
|
||||
>10.5281/zenodo.4675070</pid>
|
||||
<pid classid="doi" classname="Digital Object Identifier" schemeid="dnet:pid_types"
|
||||
schemename="dnet:pid_types" inferred="false"
|
||||
provenanceaction="sysimport:crosswalk:repository" trust="0.9"
|
||||
>10.5281/zenodo.6974562</pid>
|
||||
<pid classid="doi" classname="Digital Object Identifier" schemeid="dnet:pid_types"
|
||||
schemename="dnet:pid_types" inferred="false"
|
||||
provenanceaction="sysimport:actionset" trust="0.9">10.5281/zenodo.4675069</pid>
|
||||
<measure id="influence" score="4.916186E-9" class="C5"/>
|
||||
<measure id="popularity" score="6.885733E-9" class="C5"/>
|
||||
<measure id="influence_alt" score="0" class="C5"/>
|
||||
<measure id="popularity_alt" score="0.0" class="C5"/>
|
||||
<measure id="impulse" score="0" class="C5"/>
|
||||
<title classid="main title" classname="main title" schemeid="dnet:dataCite_title"
|
||||
schemename="dnet:dataCite_title" inferred="false"
|
||||
provenanceaction="sysimport:crosswalk:repository" trust="0.9">JUSThink Alignment
|
||||
Analysis</title>
|
||||
<bestaccessright classid="OPEN" classname="Open Access" schemeid="dnet:access_modes"
|
||||
schemename="dnet:access_modes"/>
|
||||
<creator rank="1" name="Utku" surname="Norman" orcid_pending="0000-0002-6802-1444"
|
||||
>Norman, Utku</creator>
|
||||
<creator rank="2" name="Tanvi" surname="Dinkar">Dinkar, Tanvi</creator>
|
||||
<creator rank="3" name="Barbara" surname="Bruno" orcid_pending="0000-0003-0953-7173"
|
||||
>Bruno, Barbara</creator>
|
||||
<creator rank="4" name="Chloé" surname="Clavel" orcid_pending="0000-0003-4850-3398"
|
||||
>Clavel, Chloé</creator>
|
||||
<dateofacceptance>2022-08-08</dateofacceptance>
|
||||
<description>&lt;strong>1. Description&lt;/strong> This repository
|
||||
contains&lt;strong> tools to automatically analyse how participants align
|
||||
their use of task-specific referents in their dialogue and actions for a
|
||||
collaborative learning activity, and how it relates to the task
|
||||
success&lt;/strong> (i.e. their learning outcomes and task performance). As
|
||||
a use case, it processes data from a collaborative problem solving activity
|
||||
named JUSThink [1, 2], i.e. JUSThink Dialogue and Actions Corpus data set that
|
||||
is available from the Zenodo Repository, DOI: 10.5281/zenodo.4627104, and
|
||||
reproduces the results and figures in [3]. In brief: &lt;strong>JUSThink
|
||||
Dialogue and Actions Corpus&lt;/strong> contains transcripts, event logs,
|
||||
and test responses of children aged 9 through 12, as they participate in the
|
||||
JUSThink activity [1, 2] in pairs of two, to solve a problem on graphs together.
|
||||
&lt;strong>The JUSThink activity and its study&lt;/strong> is first
|
||||
described in [1], and elaborated with findings concerning the link between
|
||||
children's learning, performance in the activity, and perception of self, the
|
||||
other and the robot in [2]. &lt;strong>Alignment analysis in our work
|
||||
[3]&lt;/strong> studies the participants' use of expressions that are
|
||||
related to the task at hand, their follow up actions of these expressions, and
|
||||
how it links to task success. &lt;strong>Changes in Release
|
||||
v1.1.0:&lt;/strong> updated with the publication information, finalized
|
||||
paper structure, research questions and hypotheses as in the published article:
|
||||
U. Norman*&lt;em>, &lt;/em>T. Dinkar*, B. Bruno, and C. Clavel,
|
||||
"Studying Alignment in a Collaborative Learning Activity via Automatic Methods:
|
||||
The Link Between What We Say and Do," Dialogue &amp;amp; Discourse, 13(2),
|
||||
1–48. *Contributed equally to this work. 10.5210/dad.2022.201.
|
||||
&lt;strong>Full Changelog:&lt;/strong>
|
||||
https://github.com/chili-epfl/justhink-alignment-analysis/compare/v1.0.0...v1.1.0
|
||||
&lt;strong>2. Publications&lt;/strong> If you use this work in an
|
||||
academic context, please cite the following publications: Norman*, U., Dinkar*,
|
||||
T., Bruno, B., &amp;amp; Clavel, C. (2022). Studying Alignment in a
|
||||
Collaborative Learning Activity via Automatic Methods: The Link Between What We
|
||||
Say and Do. Dialogue &amp;amp; Discourse, 13(2), 1–48. *Contributed equally
|
||||
to this work. https://doi.org/10.5210/dad.2022.201 Norman, U., Dinkar, T.,
|
||||
Bruno, B., &amp;amp; Clavel, C. (2021). JUSThink Alignment Analysis. In
|
||||
Dialogue &amp;amp; Discourse (v1.1.0, Vol. 13, Number 2, pp. 1–48). Zenodo.
|
||||
https://doi.org/10.5281/zenodo.6974562 &lt;strong>3. Content&lt;/strong>
|
||||
The tools provided in this repository consists of 7 Jupyter Notebooks written in
|
||||
Python 3, and two additional external tools utilised by the notebooks.
|
||||
&lt;strong>3.1. Jupyter Notebooks&lt;/strong> We highlight that the
|
||||
notebooks up until the last (i.e. to test the hypotheses
|
||||
(tools/7_test_the_hypotheses.ipynb)) present a general pipeline to process event
|
||||
logs, test responses and transcripts to extract measures of task performance,
|
||||
learning outcomes, and measures of alignment. &lt;strong>Extract task
|
||||
performance (and other features) from the logs
|
||||
&lt;/strong>(tools/1_extract_performance_and_other_features_from_logs.ipynb):
|
||||
Extracts various measures of task behaviour from the logs, at varying
|
||||
granularities of the activity (i.e. the whole corpus, task, attempt, and turn
|
||||
levels). In later notebooks, we focus on one of the features to estimate the
|
||||
task performance of a team: (minimum) error. &lt;strong>Extract learning
|
||||
outcomes from the test responses&lt;/strong>
|
||||
(tools/2_extract_learning_gain_from_test_responses.ipynb): Extracts measures of
|
||||
learning outcomes from the responses to the pre-test and the post-test. In later
|
||||
notebooks, we focus on one of the features to estimate the learning outcome of a
|
||||
team: relative learning gain [4] &lt;strong>Select and visualise a subset of
|
||||
teams for transcription&lt;/strong>
|
||||
(tools/3_visualise_transcribed_teams.ipynb): Visualises the transcribed teams
|
||||
among the other teams in the feature space spanned by task performance and
|
||||
learning outcome, as well as the distribution of their number of attempts and
|
||||
turns. &lt;strong>Extract routines from transcripts&lt;/strong>
|
||||
(tools/4_extract_routines_from_transcripts.ipynb) (uses dialign to extract
|
||||
routines): Extracts routines of referring expressions that are "fixed", i.e.
|
||||
become shared or established amongst interlocutors. &lt;strong>Combine
|
||||
transcripts with logs&lt;/strong>
|
||||
(tools/5_construct_the_corpus_by_combining_transcripts_with_logs.ipynb): Merges
|
||||
transcripts with event logs to have a combined dialogue and actions corpus, to
|
||||
be processed e.g. to detect follow-up actions. &lt;strong>Recognise
|
||||
instructions and detect follow-up actions&lt;/strong>
|
||||
(tools/6_recognise_instructions_detect_follow-up_actions.ipynb): Extracts
|
||||
verbalised instruction such as "connect Mount Basel to Montreux", and pairs them
|
||||
with the follow-up action that may &lt;em>match&lt;/em> (e.g. if the
|
||||
other connects Basel to Montreux) or &lt;em>mismatch&lt;/em> (e.g. if
|
||||
the other connects Basel to Neuchatel) with the instruction. &lt;strong>Test
|
||||
the hypotheses &lt;/strong>in [3] (tools/7_test_the_hypotheses.ipynb) (uses
|
||||
&lt;strong>effsize&lt;/strong> to estimate effect size, specifically
|
||||
Cliff's Delta): Considers each research questions and hypotheses studied in [3]
|
||||
and generates the results in [3]. &lt;strong>3.2. External
|
||||
Tools&lt;/strong> &lt;strong>dialign tool&lt;/strong> to extract
|
||||
routines, specifically Release 1.0 from dialign-1.0.zip:&lt;br> It extracts
|
||||
routine expressions that are "shared" among the participants from transcripts.
|
||||
&lt;br> It is used as an external module (in accordance with its CeCILL-B
|
||||
License, see &lt;strong>License&lt;/strong>). &lt;strong>effsize
|
||||
tool&lt;/strong> to compute estimators of effect size.&lt;br> We
|
||||
specifically use it to compute Cliff's Delta, which quantifies the amount
|
||||
difference between two groups of observations, by computing the Cliff's Delta
|
||||
statistic.&lt;br> It is taken from project DABEST (see
|
||||
&lt;strong>License&lt;/strong>). &lt;strong>4. Research Questions
|
||||
and Hypotheses in [3]&lt;/strong> &lt;strong>RQ1 Lexical
|
||||
alignment&lt;/strong>: How do the interlocutors &lt;em>use&lt;/em>
|
||||
expressions related to the task? Is this associated with task success?
|
||||
&lt;strong>H1.1&lt;/strong>: Task-specific referents become routine
|
||||
early for more successful teams. &lt;strong>H1.2&lt;/strong>: Hesitation
|
||||
phenomena are more likely to occur in the vicinity of priming and establishment
|
||||
of task-specific referents for more successful teams. &lt;strong>RQ2
|
||||
Behavioural alignment&lt;/strong>: How do the interlocutors
|
||||
&lt;em>follow up&lt;/em> these expressions with actions? Is this
|
||||
associated with task success? &lt;strong>H2.1&lt;/strong>: Instructions
|
||||
are more likely to be followed by a corresponding action early in the dialogue
|
||||
for more successful teams. &lt;strong>H2.2&lt;/strong>: When
|
||||
instructions are followed by a corresponding or a different action, the action
|
||||
is more likely to be in the vicinity of information management phenomena for
|
||||
more successful teams. The RQs and Hs are addressed in the notebook for testing
|
||||
the hypotheses (i.e. tools/7_test_the_hypotheses.ipynb).
|
||||
&lt;strong>Acknowledgements&lt;/strong> This project has received
|
||||
funding from the European Union's Horizon 2020 research and innovation programme
|
||||
under grant agreement No 765955. Namely, the ANIMATAS Project.
|
||||
&lt;strong>License&lt;/strong> The whole package is under MIT License,
|
||||
see the &lt;strong>LICENSE&lt;/strong> file. Classes under the
|
||||
&lt;strong>tools/effsize&lt;/strong> package were taken from project
|
||||
&lt;strong>DABEST&lt;/strong>, Copyright 2016-2020 Joses W. Ho. These
|
||||
classes are licensed under the BSD 3-Clause Clear License. See
|
||||
&lt;strong>tools/effsize/LICENSE&lt;/strong> file for additional
|
||||
details. Classes under the &lt;strong>tools/dialign-1.0&lt;/strong>
|
||||
package were taken from project &lt;strong>dialign&lt;/strong>. These
|
||||
classes are licensed under the CeCILL-B License. This package is used as an
|
||||
"external module", see&lt;strong>
|
||||
tools/dialign-1.0/LICENSE.txt&lt;/strong> for additional
|
||||
details.</description>
|
||||
<description>{"references": ["[1] J. Nasir, U. Norman, B. Bruno, and P. Dillenbourg,
|
||||
\"You Tell, I Do, and We Swap until we Connect All the Gold Mines!,\" ERCIM
|
||||
News, vol. 2020, no. 120, 2020, [Online]. Available:
|
||||
https://ercim-news.ercim.eu/en120/special/you-tell-i-do-and-we-swap-until-we-connect-all-the-gold-mines",
|
||||
"[2] J. Nasir*, U. Norman*, B. Bruno, and P. Dillenbourg, \"When Positive
|
||||
Perception of the Robot Has No Effect on Learning,\" in 2020 29th IEEE
|
||||
International Conference on Robot and Human Interactive Communication (RO-MAN),
|
||||
Aug. 2020, pp. 313\u2013320, doi: 10.1109/RO-MAN47096.2020.9223343", "[3] U.
|
||||
Norman*, T. Dinkar*, B. Bruno, and C. Clavel, \"Studying Alignment in a
|
||||
Collaborative Learning Activity via Automatic Methods: The Link Between What We
|
||||
Say and Do,\" Dialogue &amp;amp; Discourse, vol. 13, no. 2, pp. 1\u201348,
|
||||
Aug. 2022, doi: 10.5210/dad.2022.201.", "[4] M. Sangin, G. Molinari, M.-A.
|
||||
N\u00fcssli, and P. Dillenbourg, \"Facilitating peer knowledge modeling: Effects
|
||||
of a knowledge awareness tool on collaborative learning outcomes and
|
||||
processes,\"\" Computers in Human Behavior, vol. 27, no. 3, pp. 1059\u20131067,
|
||||
May 2011, doi: 10.1016/j.chb.2010.05.032."]}</description>
|
||||
<subject classid="keyword" classname="keyword"
|
||||
schemeid="dnet:subject_classification_typologies"
|
||||
schemename="dnet:subject_classification_typologies" inferred="false"
|
||||
provenanceaction="sysimport:crosswalk:repository" trust="0.9"
|
||||
>alignment</subject>
|
||||
<subject classid="keyword" classname="keyword"
|
||||
schemeid="dnet:subject_classification_typologies"
|
||||
schemename="dnet:subject_classification_typologies" inferred="false"
|
||||
provenanceaction="sysimport:crosswalk:repository" trust="0.9">situated
|
||||
dialogue</subject>
|
||||
<subject classid="keyword" classname="keyword"
|
||||
schemeid="dnet:subject_classification_typologies"
|
||||
schemename="dnet:subject_classification_typologies" inferred="false"
|
||||
provenanceaction="sysimport:crosswalk:repository" trust="0.9">collaborative
|
||||
learning</subject>
|
||||
<subject classid="keyword" classname="keyword"
|
||||
schemeid="dnet:subject_classification_typologies"
|
||||
schemename="dnet:subject_classification_typologies" inferred="false"
|
||||
provenanceaction="sysimport:crosswalk:repository" trust="0.9">spontaneous
|
||||
speech</subject>
|
||||
<subject classid="keyword" classname="keyword"
|
||||
schemeid="dnet:subject_classification_typologies"
|
||||
schemename="dnet:subject_classification_typologies" inferred="false"
|
||||
provenanceaction="sysimport:crosswalk:repository" trust="0.9"
|
||||
>disfluency</subject>
|
||||
<subject classid="keyword" classname="keyword"
|
||||
schemeid="dnet:subject_classification_typologies"
|
||||
schemename="dnet:subject_classification_typologies" inferred="false"
|
||||
provenanceaction="sysimport:crosswalk:repository" trust="0.9">mutual
|
||||
understanding</subject>
|
||||
<language classid="eng" classname="English" schemeid="dnet:languages"
|
||||
schemename="dnet:languages"/>
|
||||
<relevantdate classid="issued" classname="issued" schemeid="dnet:dataCite_date"
|
||||
schemename="dnet:dataCite_date" inferred="false"
|
||||
provenanceaction="sysimport:crosswalk:repository" trust="0.9"
|
||||
>2021-04-09</relevantdate>
|
||||
<relevantdate classid="issued" classname="issued" schemeid="dnet:dataCite_date"
|
||||
schemename="dnet:dataCite_date" inferred="false"
|
||||
provenanceaction="sysimport:crosswalk:repository" trust="0.9"
|
||||
>2022-08-08</relevantdate>
|
||||
<publisher>Zenodo</publisher>
|
||||
<resulttype classid="software" classname="software"
|
||||
schemeid="dnet:result_typologies" schemename="dnet:result_typologies"/>
|
||||
<resourcetype classid="UNKNOWN" classname="UNKNOWN"
|
||||
schemeid="dnet:dataCite_resource" schemename="dnet:dataCite_resource"/>
|
||||
<programmingLanguage/>
|
||||
<context id="EC" label="European Commission" type="funding">
|
||||
<category id="EC::H2020" label="Horizon 2020 Framework Programme">
|
||||
<concept id="EC::H2020::MSCA-ITN-ETN" label="European Training Networks"/>
|
||||
</category>
|
||||
</context>
|
||||
<eoscifguidelines code="EOSC::Jupyter Notebook"
|
||||
label="EOSC::Jupyter Notebook"
|
||||
url=""
|
||||
semanticrelation="compliesWith"/>
|
||||
<datainfo>
|
||||
<inferred>true</inferred>
|
||||
<deletedbyinference>false</deletedbyinference>
|
||||
<trust>0.8</trust>
|
||||
<inferenceprovenance>dedup-result-decisiontree-v3</inferenceprovenance>
|
||||
<provenanceaction classid="sysimport:dedup" classname="Inferred by OpenAIRE"
|
||||
schemeid="dnet:provenanceActions" schemename="dnet:provenanceActions"/>
|
||||
</datainfo>
|
||||
<rels>
|
||||
<rel inferred="false" trust="0.9" inferenceprovenance=""
|
||||
provenanceaction="sysimport:actionset">
|
||||
<to class="IsSupplementTo" scheme="dnet:result_result_relations"
|
||||
type="publication">doi_dedup___::ae235765bbc422195a6c9f632b2d77eb</to>
|
||||
<collectedfrom name="arXiv.org e-Print Archive"
|
||||
id="opendoar____::6f4922f45568161a8cdf4ad2299f6d23"/>
|
||||
<pid classid="arXiv" classname="arXiv" schemeid="dnet:pid_types"
|
||||
schemename="dnet:pid_types" inferred="false"
|
||||
provenanceaction="sysimport:crosswalk:repository" trust="0.9"
|
||||
>2104.04429</pid>
|
||||
<collectedfrom name="Infoscience - EPFL scientific publications"
|
||||
id="opendoar____::eecca5b6365d9607ee5a9d336962c534"/>
|
||||
<publisher>arXiv</publisher>
|
||||
<collectedfrom name="Crossref"
|
||||
id="openaire____::081b82f96300b6a6e3d282bad31cb6e2"/>
|
||||
<dateofacceptance>2022-08-05</dateofacceptance>
|
||||
<title classid="main title" classname="main title"
|
||||
schemeid="dnet:dataCite_title" schemename="dnet:dataCite_title">Studying
|
||||
Alignment in a Collaborative Learning Activity via Automatic Methods:
|
||||
The Link Between What We Say and Do</title>
|
||||
<collectedfrom name="ORCID"
|
||||
id="openaire____::806360c771262b4d6770e7cdf04b5c5a"/>
|
||||
<collectedfrom name="Datacite"
|
||||
id="openaire____::9e3be59865b2c1c335d32dae2fe7b254"/>
|
||||
<pid classid="doi" classname="Digital Object Identifier"
|
||||
schemeid="dnet:pid_types" schemename="dnet:pid_types" inferred="false"
|
||||
provenanceaction="sysimport:actionset" trust="0.9"
|
||||
>10.48550/arxiv.2104.04429</pid>
|
||||
<pid classid="doi" classname="Digital Object Identifier"
|
||||
schemeid="dnet:pid_types" schemename="dnet:pid_types"
|
||||
>10.5210/dad.2022.201</pid>
|
||||
</rel>
|
||||
<rel inferred="false" trust="0.9" inferenceprovenance=""
|
||||
provenanceaction="sysimport:actionset">
|
||||
<to class="isProducedBy" scheme="dnet:result_project_relations"
|
||||
type="project">corda__h2020::c4515ebef538a734cf11f795347f5dac</to>
|
||||
<title>Advancing intuitive human-machine interaction with human-like social
|
||||
capabilities for education in schools</title>
|
||||
<code>765955</code>
|
||||
<funding>
|
||||
<funder id="ec__________::EC" shortname="EC" name="European Commission"
|
||||
jurisdiction="EU"/>
|
||||
<funding_level_0 name="H2020">ec__________::EC::H2020</funding_level_0>
|
||||
<funding_level_1 name="MSCA-ITN-ETN"
|
||||
>ec__________::EC::H2020::MSCA-ITN-ETN</funding_level_1>
|
||||
</funding>
|
||||
<acronym>ANIMATAS</acronym>
|
||||
</rel>
|
||||
<rel inferred="false" trust="0.9" inferenceprovenance=""
|
||||
provenanceaction="sysimport:actionset">
|
||||
<to class="IsSupplementedBy" scheme="dnet:result_result_relations"
|
||||
type="dataset">doi_dedup___::0a6314b0ed275d915f5b57a259375691</to>
|
||||
<dateofacceptance>2021-03-22</dateofacceptance>
|
||||
<publisher>Zenodo</publisher>
|
||||
<pid classid="doi" classname="Digital Object Identifier"
|
||||
schemeid="dnet:pid_types" schemename="dnet:pid_types" inferred="false"
|
||||
provenanceaction="sysimport:crosswalk:repository" trust="0.9"
|
||||
>10.5281/zenodo.4627104</pid>
|
||||
<title classid="main title" classname="main title"
|
||||
schemeid="dnet:dataCite_title" schemename="dnet:dataCite_title"
|
||||
inferred="false" provenanceaction="sysimport:crosswalk:repository"
|
||||
trust="0.9">JUSThink Dialogue and Actions Corpus</title>
|
||||
<pid classid="doi" classname="Digital Object Identifier"
|
||||
schemeid="dnet:pid_types" schemename="dnet:pid_types" inferred="false"
|
||||
provenanceaction="sysimport:actionset" trust="0.9"
|
||||
>10.5281/zenodo.4627103</pid>
|
||||
<collectedfrom name="ZENODO"
|
||||
id="opendoar____::358aee4cc897452c00244351e4d91f69"/>
|
||||
<collectedfrom name="Datacite"
|
||||
id="openaire____::9e3be59865b2c1c335d32dae2fe7b254"/>
|
||||
</rel>
|
||||
</rels>
|
||||
<children>
|
||||
<result objidentifier="doi_________::c054151b6a8c4f41c7acf160651a6503">
|
||||
<publisher>Zenodo</publisher>
|
||||
<pid classid="doi" classname="Digital Object Identifier"
|
||||
schemeid="dnet:pid_types" schemename="dnet:pid_types" inferred="false"
|
||||
provenanceaction="sysimport:crosswalk:repository" trust="0.9"
|
||||
>10.5281/zenodo.4675070</pid>
|
||||
<title classid="main title" classname="main title"
|
||||
schemeid="dnet:dataCite_title" schemename="dnet:dataCite_title"
|
||||
inferred="false" provenanceaction="sysimport:crosswalk:repository"
|
||||
trust="0.9">JUSThink Alignment Analysis</title>
|
||||
<dateofacceptance>2021-04-09</dateofacceptance>
|
||||
<collectedfrom name="ZENODO"
|
||||
id="opendoar____::358aee4cc897452c00244351e4d91f69"/>
|
||||
</result>
|
||||
<result objidentifier="doi_________::04aaa160a921cafdc90e03483de0a26f">
|
||||
<dateofacceptance>2022-08-08</dateofacceptance>
|
||||
<publisher>Zenodo</publisher>
|
||||
<pid classid="doi" classname="Digital Object Identifier"
|
||||
schemeid="dnet:pid_types" schemename="dnet:pid_types" inferred="false"
|
||||
provenanceaction="sysimport:crosswalk:repository" trust="0.9"
|
||||
>10.5281/zenodo.6974562</pid>
|
||||
<collectedfrom name="ZENODO"
|
||||
id="opendoar____::358aee4cc897452c00244351e4d91f69"/>
|
||||
<title classid="main title" classname="main title"
|
||||
schemeid="dnet:dataCite_title" schemename="dnet:dataCite_title"
|
||||
inferred="false" provenanceaction="sysimport:crosswalk:repository"
|
||||
trust="0.9">JUSThink Alignment Analysis (v1.1.0)</title>
|
||||
</result>
|
||||
<result objidentifier="doi_________::684a8fbe0ff09f288e9d29db897233bb">
|
||||
<title classid="main title" classname="main title"
|
||||
schemeid="dnet:dataCite_title" schemename="dnet:dataCite_title">JUSThink
|
||||
Alignment Analysis (v1.1.0)</title>
|
||||
<dateofacceptance>2022-08-08</dateofacceptance>
|
||||
<publisher>Zenodo</publisher>
|
||||
<pid classid="doi" classname="Digital Object Identifier"
|
||||
schemeid="dnet:pid_types" schemename="dnet:pid_types" inferred="false"
|
||||
provenanceaction="sysimport:actionset" trust="0.9"
|
||||
>10.5281/zenodo.4675069</pid>
|
||||
<collectedfrom name="Datacite"
|
||||
id="openaire____::9e3be59865b2c1c335d32dae2fe7b254"/>
|
||||
</result>
|
||||
<instance>
|
||||
<accessright classid="OPEN" classname="Open Access"
|
||||
schemeid="dnet:access_modes" schemename="dnet:access_modes"/>
|
||||
<collectedfrom name="Datacite"
|
||||
id="openaire____::9e3be59865b2c1c335d32dae2fe7b254"/>
|
||||
<hostedby name="ZENODO" id="opendoar____::358aee4cc897452c00244351e4d91f69"/>
|
||||
<dateofacceptance>2022-08-08</dateofacceptance>
|
||||
<instancetype classid="0029" classname="Software"
|
||||
schemeid="dnet:publication_resource"
|
||||
schemename="dnet:publication_resource"/>
|
||||
<pid classid="doi" classname="Digital Object Identifier"
|
||||
schemeid="dnet:pid_types" schemename="dnet:pid_types" inferred="false"
|
||||
provenanceaction="sysimport:actionset" trust="0.9"
|
||||
>10.5281/zenodo.4675069</pid>
|
||||
<refereed classid="0000" classname="UNKNOWN" schemeid="dnet:review_levels"
|
||||
schemename="dnet:review_levels"/>
|
||||
<license>https://opensource.org/licenses/MIT</license>
|
||||
<webresource>
|
||||
<url>https://doi.org/10.5281/zenodo.4675069</url>
|
||||
</webresource>
|
||||
</instance>
|
||||
<instance>
|
||||
<accessright classid="OPEN" classname="Open Access"
|
||||
schemeid="dnet:access_modes" schemename="dnet:access_modes"/>
|
||||
<collectedfrom name="ZENODO"
|
||||
id="opendoar____::358aee4cc897452c00244351e4d91f69"/>
|
||||
<hostedby name="ZENODO" id="opendoar____::358aee4cc897452c00244351e4d91f69"/>
|
||||
<dateofacceptance>2022-08-08</dateofacceptance>
|
||||
<instancetype classid="0029" classname="Software"
|
||||
schemeid="dnet:publication_resource"
|
||||
schemename="dnet:publication_resource"/>
|
||||
<pid classid="doi" classname="Digital Object Identifier"
|
||||
schemeid="dnet:pid_types" schemename="dnet:pid_types" inferred="false"
|
||||
provenanceaction="sysimport:crosswalk:repository" trust="0.9"
|
||||
>10.5281/zenodo.6974562</pid>
|
||||
<refereed classid="0000" classname="UNKNOWN" schemeid="dnet:review_levels"
|
||||
schemename="dnet:review_levels"/>
|
||||
<license>https://opensource.org/licenses/MIT</license>
|
||||
<webresource>
|
||||
<url>https://doi.org/10.5281/zenodo.6974562</url>
|
||||
</webresource>
|
||||
</instance>
|
||||
<instance>
|
||||
<accessright classid="OPEN" classname="Open Access"
|
||||
schemeid="dnet:access_modes" schemename="dnet:access_modes"/>
|
||||
<collectedfrom name="ZENODO"
|
||||
id="opendoar____::358aee4cc897452c00244351e4d91f69"/>
|
||||
<hostedby name="ZENODO" id="opendoar____::358aee4cc897452c00244351e4d91f69"/>
|
||||
<dateofacceptance>2021-04-09</dateofacceptance>
|
||||
<instancetype classid="0029" classname="Software"
|
||||
schemeid="dnet:publication_resource"
|
||||
schemename="dnet:publication_resource"/>
|
||||
<pid classid="doi" classname="Digital Object Identifier"
|
||||
schemeid="dnet:pid_types" schemename="dnet:pid_types" inferred="false"
|
||||
provenanceaction="sysimport:crosswalk:repository" trust="0.9"
|
||||
>10.5281/zenodo.4675070</pid>
|
||||
<refereed classid="0000" classname="UNKNOWN" schemeid="dnet:review_levels"
|
||||
schemename="dnet:review_levels"/>
|
||||
<license>https://opensource.org/licenses/MIT</license>
|
||||
<webresource>
|
||||
<url>https://doi.org/10.5281/zenodo.4675070</url>
|
||||
</webresource>
|
||||
</instance>
|
||||
</children>
|
||||
</oaf:result>
|
||||
</oaf:entity>
|
||||
</metadata>
|
||||
</result>
|
||||
</record>
|
|
@ -3,10 +3,10 @@
|
|||
<FIELD indexable="false" name="oafentity" result="true" stat="false" tokenizable="false" xpath="//*[local-name() = 'entity']"/>
|
||||
<FIELD indexable="true" name="oaftype" result="false" stat="false" tokenizable="false" value="local-name(//*[local-name()='entity']/*[local-name() != 'extraInfo'])"/>
|
||||
<FIELD indexable="true" name="objIdentifier" result="false" stat="false" tokenizable="false" xpath="//header/dri:objIdentifier"/><!-- DATASOURCE FIELDS -->
|
||||
<FIELD indexable="true" name="datasourceofficialname" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/officialname"/>
|
||||
<FIELD indexable="true" name="datasourceenglishname" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/englishname"/>
|
||||
<FIELD indexable="true" name="datasourceoddescription" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/oddescription"/>
|
||||
<FIELD indexable="true" name="datasourceodsubjects" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/odsubjects"/>
|
||||
<FIELD copy="true" indexable="true" name="datasourceofficialname" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/officialname"/>
|
||||
<FIELD copy="true" indexable="true" name="datasourceenglishname" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/englishname"/>
|
||||
<FIELD copy="true" indexable="true" name="datasourceoddescription" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/oddescription"/>
|
||||
<FIELD copy="true" indexable="true" name="datasourceodsubjects" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/odsubjects"/>
|
||||
<FIELD indexable="true" name="datasourceodlanguages" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/odlanguages"/>
|
||||
<FIELD indexable="true" name="datasourceodcontenttypes" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/odcontenttypes"/>
|
||||
<FIELD indexable="true" multivalued="false" name="datasourcetypename" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/datasourcetype/@classname"/>
|
||||
|
@ -14,17 +14,16 @@
|
|||
<FIELD indexable="true" multivalued="false" name="datasourcetypeuiname" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/datasourcetypeui/@classname"/>
|
||||
<FIELD indexable="true" multivalued="false" name="datasourcecompatibilityid" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/openairecompatibility/@classid"/>
|
||||
<FIELD indexable="true" multivalued="false" name="datasourcecompatibilityname" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/openairecompatibility/@classname"/>
|
||||
<FIELD indexable="true" multivalued="true" name="datasourcesubject" result="false" stat="false" type="ngramtext" xpath="//*[local-name()='entity']/*[local-name()='datasource']/subjects"/>
|
||||
<FIELD indexable="true" name="versioning" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/versioning"/>
|
||||
<!-- datasource fields for EOSC -->
|
||||
<FIELD indexable="true" name="datasourcejurisdiction" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/jurisdiction"/>
|
||||
<FIELD copy="true" indexable="true" multivalued="true" name="datasourcesubject" result="false" stat="false" type="ngramtext" xpath="//*[local-name()='entity']/*[local-name()='datasource']/subjects"/>
|
||||
<FIELD indexable="true" name="versioning" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/versioning"/><!-- datasource fields for EOSC -->
|
||||
<FIELD indexable="true" name="datasourcejurisdiction" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/jurisdiction/@classname"/>
|
||||
<FIELD indexable="true" name="datasourcethematic" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/thematic"/>
|
||||
<FIELD indexable="true" name="datasourceknowledge_graph" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/knowledgegraph"/>
|
||||
<FIELD indexable="true" name="datasourcecontentpolicy" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/contentpolicy"/>
|
||||
<!-- ORGANIZATION FIELDS -->
|
||||
<FIELD indexable="true" name="organizationlegalshortname" result="false" stat="false" type="ngramtext" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='organization']//legalshortname)"/>
|
||||
<FIELD indexable="true" name="organizationlegalname" result="false" stat="false" type="ngramtext" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='organization']//legalname)"/>
|
||||
<FIELD indexable="true" name="organizationalternativenames" result="false" stat="false" type="ngramtext" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='organization']//alternativeNames)"/>
|
||||
<FIELD indexable="true" name="datasourcecontentpolicy" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/contentpolicy/@classname"/>
|
||||
<FIELD indexable="true" name="eosctype" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/eosctype/@classname"/>
|
||||
<FIELD indexable="true" name="eoscdatasourcetype" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/eoscdatasourcetype/@classname"/><!-- ORGANIZATION FIELDS --><!-- ORGANIZATION FIELDS --><!-- ORGANIZATION FIELDS -->
|
||||
<FIELD copy="true" indexable="true" name="organizationlegalshortname" result="false" stat="false" type="ngramtext" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='organization']//legalshortname)"/>
|
||||
<FIELD copy="true" indexable="true" name="organizationlegalname" result="false" stat="false" type="ngramtext" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='organization']//legalname)"/>
|
||||
<FIELD copy="true" indexable="true" name="organizationalternativenames" result="false" stat="false" type="ngramtext" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='organization']//alternativeNames)"/>
|
||||
<FIELD indexable="true" name="organizationeclegalbody" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='organization']/eclegalbody"/>
|
||||
<FIELD indexable="true" name="organizationeclegalperson" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='organization']/eclegalperson"/>
|
||||
<FIELD indexable="true" name="organizationecnonprofit" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='organization']/ecnonprofit"/>
|
||||
|
@ -34,18 +33,17 @@
|
|||
<FIELD indexable="true" name="organizationecenterprise" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='organization']/ecenterprise"/>
|
||||
<FIELD indexable="true" name="organizationecsmevalidated" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='organization']/ecsmevalidated"/>
|
||||
<FIELD indexable="true" name="organizationecnutscode" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='organization']/ecnutscode"/>
|
||||
<FIELD indexable="true" multivalued="false" name="organizationcountryname" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='organization']/country/@classname"/>
|
||||
<!-- PROJECT FIELDS -->
|
||||
<FIELD indexable="true" name="projectcode" result="false" stat="false" type="ngramtext" xpath="//*[local-name()='entity']/*[local-name()='project']/code"/>
|
||||
<FIELD indexable="true" multivalued="false" name="organizationcountryname" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='organization']/country/@classname"/><!-- PROJECT FIELDS -->
|
||||
<FIELD copy="true" indexable="true" name="projectcode" result="false" stat="false" type="ngramtext" xpath="//*[local-name()='entity']/*[local-name()='project']/code"/>
|
||||
<FIELD indexable="true" name="projectcode_nt" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/code"/>
|
||||
<FIELD indexable="true" name="projectacronym" result="false" stat="false" type="ngramtext" xpath="//*[local-name()='entity']/*[local-name()='project']/acronym"/>
|
||||
<FIELD indexable="true" name="projecttitle" result="false" stat="false" type="ngramtext" xpath="//*[local-name()='entity']/*[local-name()='project']/title"/>
|
||||
<FIELD indexable="true" multivalued="false" name="projectstartdate" result="false" stat="false" type="pdate" value="//*[local-name()='entity']/*[local-name()='project']/startdate"/>
|
||||
<FIELD copy="true" indexable="true" name="projectacronym" result="false" stat="false" type="ngramtext" xpath="//*[local-name()='entity']/*[local-name()='project']/acronym"/>
|
||||
<FIELD copy="true" indexable="true" name="projecttitle" result="false" stat="false" type="ngramtext" xpath="//*[local-name()='entity']/*[local-name()='project']/title"/>
|
||||
<FIELD indexable="true" multivalued="false" name="projectstartdate" result="false" stat="false" type="date" value="//*[local-name()='entity']/*[local-name()='project']/startdate"/>
|
||||
<FIELD indexable="true" multivalued="false" name="projectstartyear" result="false" stat="false" tokenizable="false" value="dnet:extractYear(//*[local-name()='entity']/*[local-name()='project']/startdate)"/>
|
||||
<FIELD indexable="true" multivalued="false" name="projectenddate" result="false" stat="false" type="pdate" value="//*[local-name()='entity']/*[local-name()='project']/enddate"/>
|
||||
<FIELD indexable="true" multivalued="false" name="projectenddate" result="false" stat="false" type="date" value="//*[local-name()='entity']/*[local-name()='project']/enddate"/>
|
||||
<FIELD indexable="true" multivalued="false" name="projectendyear" result="false" stat="false" tokenizable="false" value="dnet:extractYear(//*[local-name()='entity']/*[local-name()='project']/enddate)"/>
|
||||
<FIELD indexable="true" multivalued="false" name="projectcallidentifier" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/callidentifier"/>
|
||||
<FIELD indexable="true" name="projectkeywords" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='project']/keywords"/>
|
||||
<FIELD copy="true" indexable="true" name="projectkeywords" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='project']/keywords"/>
|
||||
<FIELD indexable="true" multivalued="false" name="projectduration" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/duration"/>
|
||||
<FIELD indexable="true" multivalued="false" name="projectecsc39" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='project']/ecsc39)"/>
|
||||
<FIELD indexable="true" multivalued="false" name="projectoamandatepublications" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/oamandatepublications"/>
|
||||
|
@ -54,35 +52,36 @@
|
|||
<FIELD indexable="true" multivalued="false" name="projectcontracttypename" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/contracttype/@classname"/>
|
||||
<FIELD indexable="true" name="fundinglevel0_id" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree//funding_level_0/id"/>
|
||||
<FIELD indexable="true" name="fundinglevel0_name" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree//funding_level_0/name"/>
|
||||
<FIELD indexable="true" name="fundinglevel0_description" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree//funding_level_0/description"/>
|
||||
<FIELD copy="true" indexable="true" name="fundinglevel0_description" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree//funding_level_0/description"/>
|
||||
<FIELD indexable="true" name="fundinglevel1_id" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree//funding_level_1/id"/>
|
||||
<FIELD indexable="true" name="fundinglevel1_name" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree//funding_level_1/name"/>
|
||||
<FIELD indexable="true" name="fundinglevel1_description" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree//funding_level_1/description"/>
|
||||
<FIELD copy="true" indexable="true" name="fundinglevel1_description" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree//funding_level_1/description"/>
|
||||
<FIELD indexable="true" name="fundinglevel2_id" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree//funding_level_2/id"/>
|
||||
<FIELD indexable="true" name="fundinglevel2_name" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree//funding_level_2/name"/>
|
||||
<FIELD indexable="true" name="fundinglevel2_description" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree//funding_level_2/description"/><!-- PROJECTS' FUNDER FIELDS: indexable only with the new funding path/context handling -->
|
||||
<FIELD copy="true" indexable="true" name="fundinglevel2_description" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree//funding_level_2/description"/><!-- PROJECTS' FUNDER FIELDS: indexable only with the new funding path/context handling -->
|
||||
<FIELD indexable="true" name="funder" result="false" stat="false" tokenizable="false" value="concat(./id/text(), '||', ./name/text(), '||', ./shortname/text())" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree/funder"/>
|
||||
<FIELD indexable="true" name="fundershortname" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree/funder/shortname"/>
|
||||
<FIELD indexable="true" name="funderid" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree/funder/id"/>
|
||||
<FIELD indexable="true" name="fundername" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree/funder/name"/>
|
||||
<FIELD indexable="true" name="funderoriginalname" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree/funder/originalname"/>
|
||||
<FIELD indexable="true" name="funderjurisdiction" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree/funder/jurisdiction"/><!-- RESULT FIELDS -->
|
||||
<FIELD indexable="true" name="resulttitle" result="false" stat="false" xpath="//*[local-name() = 'entity']/*[local-name() ='result']/title | //*[local-name()='entity']/*[local-name()='result']/children/result/title"/>
|
||||
<FIELD indexable="true" name="resultsubject" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/subject)"/>
|
||||
<FIELD copy="true" indexable="true" name="resulttitle" result="false" stat="false" type="text_en" xpath="//*[local-name() = 'entity']/*[local-name() ='result']/title | //*[local-name()='entity']/*[local-name()='result']/children/result/title"/>
|
||||
<FIELD indexable="true" name="resultsubject" result="false" stat="false" type="text_en" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/subject)"/>
|
||||
<FIELD indexable="true" name="resultsubjectclass" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/subject/@classname)"/>
|
||||
<FIELD indexable="true" multivalued="false" name="resultembargoenddate" result="false" stat="false" type="pdate" value="//*[local-name()='entity']/*[local-name()='result']/embargoenddate"/>
|
||||
<FIELD indexable="true" multivalued="false" name="resultembargoenddate" result="false" stat="false" type="date" value="//*[local-name()='entity']/*[local-name()='result']/embargoenddate"/>
|
||||
<FIELD indexable="true" multivalued="false" name="resultembargoendyear" result="false" stat="false" tokenizable="false" value="dnet:extractYear(//*[local-name()='entity']/*[local-name()='result']/embargoenddate)"/>
|
||||
<FIELD indexable="true" multivalued="false" name="resulttypeid" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='result']/resulttype/@classid"/>
|
||||
<FIELD indexable="true" multivalued="false" name="resulttypename" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='result']/resulttype/@classname"/>
|
||||
<FIELD indexable="true" multivalued="false" name="resultlanguagename" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='result']/language/@classname"/>
|
||||
<FIELD indexable="true" name="resultpublisher" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='result']/*[local-name()='publisher']"/>
|
||||
<FIELD indexable="true" name="resultdescription" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='result']//*[local-name()='description']"/>
|
||||
<FIELD copy="true" indexable="true" name="resultpublisher" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='result']/*[local-name()='publisher']"/>
|
||||
<FIELD copy="true" indexable="true" name="resultdescription" result="false" stat="false" type="text_en" xpath="//*[local-name()='entity']/*[local-name()='result']//*[local-name()='description']"/>
|
||||
<FIELD indexable="true" name="resultlicense" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='result']/children/instance/license"/>
|
||||
<FIELD indexable="true" name="resultaccessright" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='result']/children/instance/accessright/@classname"/>
|
||||
<FIELD indexable="true" name="resultresourcetypename" result="false" stat="false" type="string_ci" xpath="//*[local-name()='entity']/*[local-name()='result']/resourcetype/@classname"/>
|
||||
<FIELD indexable="true" multivalued="false" name="resultbestaccessright" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/bestaccessright/@classname)"/>
|
||||
<FIELD indexable="true" multivalued="false" name="resultdateofacceptance" result="false" stat="false" type="pdate" value="//*[local-name()='entity']/*[local-name()='result']/dateofacceptance"/>
|
||||
<FIELD indexable="true" multivalued="false" name="resultacceptanceyear" result="false" stat="false" tokenizable="false" value="dnet:extractYear(//*[local-name()='entity']/*[local-name()='result']/dateofacceptance)"/>
|
||||
<FIELD indexable="true" multivalued="true" name="resultauthor" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='result']/creator"/>
|
||||
<FIELD indexable="true" multivalued="false" name="resultdateofacceptance" result="false" stat="false" type="date" value="//*[local-name()='entity']/*[local-name()='result']/dateofacceptance"/>
|
||||
<FIELD copy="true" indexable="true" multivalued="false" name="resultacceptanceyear" result="false" stat="false" tokenizable="false" value="dnet:extractYear(//*[local-name()='entity']/*[local-name()='result']/dateofacceptance)"/>
|
||||
<FIELD copy="true" indexable="true" multivalued="true" name="resultauthor" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='result']/creator"/>
|
||||
<FIELD indexable="true" multivalued="true" name="resultauthor_nt" result="false" stat="false" type="string_ci" xpath="//*[local-name()='entity']/*[local-name()='result']/creator"/>
|
||||
<FIELD indexable="true" multivalued="true" name="authorid" result="false" stat="false" type="string_ci" xpath="//*[local-name()='entity']/*[local-name()='result']/creator/@*[local-name() != 'rank' and local-name() != 'name' and local-name() != 'surname']"/>
|
||||
<FIELD indexable="true" multivalued="true" name="authoridtype" result="false" stat="false" type="string_ci" xpath="//*[local-name()='entity']/*[local-name()='result']/creator/@*[local-name() != 'rank' and local-name() != 'name' and local-name() != 'surname']/local-name()"/>
|
||||
|
@ -94,26 +93,29 @@
|
|||
<FIELD indexable="true" name="resultdupid" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*//children/result/@objidentifier"/>
|
||||
<FIELD indexable="true" name="organizationdupid" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*//children/organization/@objidentifier"/>
|
||||
<FIELD indexable="true" name="externalrefsite" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//children/externalreference/sitename)"/>
|
||||
<FIELD indexable="true" name="externalreflabel" result="false" stat="false" tokenizable="true" xpath="distinct-values(//*[local-name()='entity']/*//children/externalreference/label)"/>
|
||||
<FIELD copy="true" indexable="true" name="externalreflabel" result="false" stat="false" tokenizable="true" xpath="distinct-values(//*[local-name()='entity']/*//children/externalreference/label)"/>
|
||||
<FIELD indexable="true" name="externalrefclass" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//children/externalreference/qualifier/@classid)"/>
|
||||
<FIELD indexable="true" name="externalrefid" result="false" stat="false" tokenizable="false" xpath="(//*[local-name()='entity']/*//children/externalreference/refidentifier)"/>
|
||||
<FIELD indexable="true" name="resultidentifier" result="false" stat="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/children/instance/webresource/*[local-name()='url'])"/>
|
||||
<FIELD indexable="true" name="resultsource" result="false" stat="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/source)"/><!-- REL FIELDS -->
|
||||
<FIELD copy="true" indexable="true" name="resultidentifier" result="false" stat="false" type="string_ci" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/children/instance/webresource/*[local-name()='url'])"/>
|
||||
<FIELD copy="true" indexable="true" name="resultsource" result="false" stat="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/source)"/>
|
||||
<FIELD indexable="true" name="eoscifguidelines" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name() = 'result']/eoscifguidelines/@code)"/><!-- FOS and SDGs non tokenizable for faceted search-->
|
||||
<FIELD indexable="true" name="fos" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/subject[@classid='FOS'])"/>
|
||||
<FIELD indexable="true" name="sdg" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/subject[@classid='SDG'])"/><!-- REL FIELDS -->
|
||||
<FIELD indexable="true" name="reldatasourcecompatibilityid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='datasource']/openairecompatibility/@classid)"/>
|
||||
<FIELD indexable="true" name="relproject" result="false" stat="false" tokenizable="false" value="distinct-values(concat(./text(), '||', dnet:pickFirst(../acronym/text(), ../title/text())))" xpath="//*[local-name()='entity']/*//rel/to[@type='project']"/>
|
||||
<FIELD indexable="true" name="relprojectid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/to[@type='project'])"/>
|
||||
<FIELD indexable="true" name="relprojectcode" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='project']/code)"/>
|
||||
<FIELD indexable="true" name="relprojectname" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='project']/acronym)"/>
|
||||
<FIELD indexable="true" name="relprojecttitle" result="false" stat="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='project']/title)"/>
|
||||
<FIELD copy="true" indexable="true" name="relprojectname" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='project']/acronym)"/>
|
||||
<FIELD copy="true" indexable="true" name="relprojecttitle" result="false" stat="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='project']/title)"/>
|
||||
<FIELD indexable="true" name="relcontracttypeid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='project']/contracttype/@classid)"/>
|
||||
<FIELD indexable="true" name="relcontracttypename" result="false" stat="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='project']/contracttype/@classname)"/>
|
||||
<FIELD copy="true" indexable="true" name="relcontracttypename" result="false" stat="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='project']/contracttype/@classname)"/>
|
||||
<FIELD indexable="true" name="relorganizationcountryid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='organization']/country/@classid)"/>
|
||||
<FIELD indexable="true" name="relorganizationcountryname" result="false" stat="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='organization']/country/@classname)"/>
|
||||
<FIELD copy="true" indexable="true" name="relorganizationcountryname" result="false" stat="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='organization']/country/@classname)"/>
|
||||
<FIELD indexable="true" name="relorganizationid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/to[@type='organization'])"/>
|
||||
<FIELD indexable="true" name="relorganizationname" result="false" stat="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='organization']/legalname)"/>
|
||||
<FIELD indexable="true" name="relorganizationshortname" result="false" stat="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='organization']/legalshortname)"/>
|
||||
<FIELD copy="true" indexable="true" name="relorganizationname" result="false" stat="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='organization']/legalname)"/>
|
||||
<FIELD copy="true" indexable="true" name="relorganizationshortname" result="false" stat="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='organization']/legalshortname)"/>
|
||||
<FIELD indexable="true" name="relresultid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/to[@type='result'])"/>
|
||||
<FIELD indexable="true" name="relresulttype" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/resulttype/@classid)"/>
|
||||
<FIELD indexable="true" name="relresulttype" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/to/@type)"/>
|
||||
<FIELD indexable="true" name="relclass" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/to/@class)"/>
|
||||
<FIELD indexable="true" name="relfundinglevel0_id" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']//rel/funding/funding_level_0"/>
|
||||
<FIELD indexable="true" name="relfundinglevel0_name" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']//rel/funding/funding_level_0/@name/string()"/>
|
||||
|
@ -132,13 +134,15 @@
|
|||
<FIELD indexable="true" name="relfunderjurisdiction" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']//rel/funding/funder/@jurisdiction)"/><!-- Collected from of the related entity. Available for result-result relationships -->
|
||||
<FIELD indexable="true" name="relcollectedfromid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/collectedfrom/@id)"/>
|
||||
<FIELD indexable="true" name="relcollectedfromname" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/collectedfrom/@name)"/>
|
||||
<FIELD indexable="true" name="relvalidated" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./validated]/to[@type='project'])"/>
|
||||
<FIELD indexable="true" name="semrelid" result="false" stat="false" tokenizable="false" value="concat(./to/text(), '||', ./to/@class/string())" xpath="//*[local-name()='entity']//rel"/><!-- COMMON FIELDS -->
|
||||
<FIELD indexable="true" multivalued="false" name="dateofcollection" result="false" stat="false" type="pdate" value="//header/*[local-name()='dateOfCollection']"/>
|
||||
<FIELD indexable="true" multivalued="false" name="dateofcollection" result="false" stat="false" type="date" value="//header/*[local-name()='dateOfCollection']"/>
|
||||
<FIELD indexable="true" name="status" result="false" stat="false" tokenizable="false" type="string_ci" xpath="//header/*[local-name()='status']"/>
|
||||
<FIELD indexable="true" name="collectedfrom" result="false" stat="false" tokenizable="false" value="distinct-values(concat(./@id, '||', ./@name))" xpath="//*[local-name()='entity']/*/*[local-name()='collectedfrom'] | //*[local-name()='entity']/*//*[local-name() = 'instance']/*[local-name()='collectedfrom']"/>
|
||||
<FIELD indexable="true" name="collectedfromdatasourceid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*/*[local-name()='collectedfrom']/@id | //*[local-name()='entity']/*//*[local-name() = 'instance']/*[local-name()='collectedfrom']/@id)"/>
|
||||
<FIELD indexable="true" name="collectedfromname" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*/*[local-name()='collectedfrom']/@name | //*[local-name()='entity']/*//*[local-name() = 'instance']/*[local-name()='collectedfrom']/@name)"/>
|
||||
<FIELD indexable="true" name="originalid" result="false" stat="false" tokenizable="false" type="string_ci" xpath="//*[local-name()='entity']/*/*[local-name()='originalId']"/>
|
||||
<FIELD indexable="true" name="pid" result="false" stat="false" tokenizable="false" type="string_ci" xpath="distinct-values(//*[local-name()='entity']/*/pid/text()|//*[local-name()='instance']/*[local-name()='alternateidentifier']/text())"/>
|
||||
<FIELD indexable="true" name="pid" result="false" stat="false" tokenizable="false" type="string_ci" xpath="//*[local-name()='entity']/*/pid/text()"/>
|
||||
<FIELD indexable="true" name="pidclassid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*/pid/@classid)"/>
|
||||
<FIELD indexable="true" name="pidclassname" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*/pid/@classname)"/>
|
||||
<FIELD indexable="true" name="inferred" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']//datainfo/inferred"/>
|
||||
|
@ -156,20 +160,6 @@
|
|||
<FIELD indexable="true" name="categoryname" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/context/category/@label)"/>
|
||||
<FIELD indexable="true" name="conceptid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/context/category//concept/@id)"/>
|
||||
<FIELD indexable="true" name="conceptname" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/context/category//concept/@label)"/><!-- new index field for country info from different xpaths for any type of entity -->
|
||||
<FIELD indexable="true" name="country" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*/country/@classid | //*[local-name()='entity']/*//rel[./to/@type='organization']/country/@classid | //*[local-name()='entity']//funder/@jurisdiction)"/><!-- COUNTER FIELDS -->
|
||||
<FIELD header="true" indexable="true" multivalued="false" name="counter_dedup" result="false" stored="true" type="pint" xpath="/record/result/*[local-name()='header']/*[local-name()='counters']/counter_dedup/@value"/>
|
||||
<FIELD header="true" indexable="true" multivalued="false" name="counter_authorship" result="false" stored="true" type="pint" xpath="/record/result/*[local-name()='header']/*[local-name()='counters']/counter_authorship/@value"/>
|
||||
<FIELD header="true" indexable="true" multivalued="false" name="counter_participation" result="false" stored="true" type="pint" xpath="/record/result/*[local-name()='header']/*[local-name()='counters']/counter_participation/@value"/>
|
||||
<FIELD header="true" indexable="true" multivalued="false" name="counter_similarity" result="false" stored="true" type="pint" xpath="/record/result/*[local-name()='header']/*[local-name()='counters']/counter_similarity/@value"/>
|
||||
<FIELD header="true" indexable="true" multivalued="false" name="counter_publicationdataset" result="false" stored="true" type="pint" xpath="/record/result/*[local-name()='header']/*[local-name()='counters']/counter_publicationDataset/@value"/>
|
||||
<FIELD header="true" indexable="true" multivalued="false" name="counter_publicationdataset_claimed" result="false" stored="true" type="pint" xpath="/record/result/*[local-name()='header']/*[local-name()='counters']/counter_publicationDataset_claimed/@value"/>
|
||||
<FIELD header="true" indexable="true" multivalued="false" name="counter_publicationdataset_collected" result="false" stored="true" type="pint" xpath="/record/result/*[local-name()='header']/*[local-name()='counters']/counter_publicationDataset_collected/@value"/>
|
||||
<FIELD header="true" indexable="true" multivalued="false" name="counter_publicationdataset_inferred" result="false" stored="true" type="pint" xpath="/record/result/*[local-name()='header']/*[local-name()='counters']/counter_publicationDataset_inferred/@value"/>
|
||||
<FIELD header="true" indexable="true" multivalued="false" name="counter_outcome" result="false" stored="true" type="pint" xpath="/record/result/*[local-name()='header']/*[local-name()='counters']/counter_outcome/@value"/>
|
||||
<FIELD header="true" indexable="true" multivalued="false" name="counter_outcome_claimed" result="false" stored="true" type="pint" xpath="/record/result/*[local-name()='header']/*[local-name()='counters']/counter_outcome_claimed/@value"/>
|
||||
<FIELD header="true" indexable="true" multivalued="false" name="counter_outcome_collected" result="false" stored="true" type="pint" xpath="/record/result/*[local-name()='header']/*[local-name()='counters']/counter_outcome_collected/@value"/>
|
||||
<FIELD header="true" indexable="true" multivalued="false" name="counter_outcome_inferred" result="false" stored="true" type="pint" xpath="/record/result/*[local-name()='header']/*[local-name()='counters']/counter_outcome_inferred/@value"/>
|
||||
<FIELD header="true" indexable="true" multivalued="false" name="counter_affiliation" result="false" stored="true" type="pint" xpath="/record/result/*[local-name()='header']/*[local-name()='counters']/counter_affiliation/@value"/>
|
||||
<FIELD header="true" indexable="true" multivalued="false" name="counter_doi" result="false" stored="true" type="pint" xpath="/record/result/*[local-name()='header']/*[local-name()='counters']/counter_doi/@value"/>
|
||||
<FIELD indexable="true" name="country" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*/country/@classid | //*[local-name()='entity']/*//rel[./to/@type='organization']/country/@classid | //*[local-name()='entity']//funder/@jurisdiction)"/>
|
||||
</FIELDS>
|
||||
</LAYOUT>
|
|
@ -21,7 +21,7 @@
|
|||
</property>
|
||||
<property>
|
||||
<name>hive_jdbc_url</name>
|
||||
<value>jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000</value>
|
||||
<value>jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000/;UseNativeQuery=1;?spark.executor.memory=19166291558;spark.yarn.executor.memoryOverhead=3225;spark.driver.memory=11596411699;spark.yarn.driver.memoryOverhead=1228</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.wf.workflow.notification.url</name>
|
||||
|
|
|
@ -42,7 +42,9 @@ SELECT p.id,
|
|||
CASE WHEN prr2.id IS NULL THEN 0 ELSE prr2.dp END AS delayedpubs,
|
||||
p.callidentifier,
|
||||
p.code,
|
||||
p.totalcost
|
||||
p.totalcost,
|
||||
p.fundedamount,
|
||||
p.currency
|
||||
FROM ${stats_db_name}.project_tmp p
|
||||
LEFT JOIN (SELECT pr.id, count(distinct pr.result) AS np
|
||||
FROM ${stats_db_name}.project_results pr
|
||||
|
|
|
@ -59,7 +59,7 @@ UNION ALL
|
|||
SELECT * FROM ${stats_db_name}.otherresearchproduct_sources;
|
||||
|
||||
|
||||
create table ${stats_db_name}.result_orcid STORED AS PARQUET as
|
||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.result_orcid STORED AS PARQUET as
|
||||
select distinct res.id, regexp_replace(res.orcid, 'http://orcid.org/' ,'') as orcid
|
||||
from (
|
||||
SELECT substr(res.id, 4) as id, auth_pid.value as orcid
|
||||
|
@ -69,7 +69,7 @@ from (
|
|||
LATERAL VIEW explode(auth.pid.qualifier.classid) apt as author_pid_type
|
||||
WHERE res.datainfo.deletedbyinference = FALSE and res.datainfo.invisible = FALSE and author_pid_type = 'orcid') as res;
|
||||
|
||||
create table ${stats_db_name}.result_result stored as parquet as
|
||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.result_result stored as parquet as
|
||||
select substr(rel.source, 4) as source, substr(rel.target, 4) as target, relclass, subreltype
|
||||
from ${openaire_db_name}.relation rel
|
||||
join ${openaire_db_name}.result r1 on rel.source=r1.id
|
||||
|
@ -82,7 +82,7 @@ where reltype='resultResult'
|
|||
and r2.resulttype.classname != 'other'
|
||||
and rel.datainfo.deletedbyinference=false and rel.datainfo.invisible = FALSE;
|
||||
|
||||
create table ${stats_db_name}.result_citations_oc stored as parquet as
|
||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.result_citations_oc stored as parquet as
|
||||
select substr(target, 4) as id, count(distinct substr(source, 4)) as citations
|
||||
from ${openaire_db_name}.relation rel
|
||||
join ${openaire_db_name}.result r1 on rel.source=r1.id
|
||||
|
@ -97,7 +97,7 @@ where relClass='Cites' and rel.datainfo.provenanceaction.classid = 'sysimport:cr
|
|||
and rel.datainfo.deletedbyinference=false and rel.datainfo.invisible = FALSE
|
||||
group by substr(target, 4);
|
||||
|
||||
create table ${stats_db_name}.result_references_oc stored as parquet as
|
||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.result_references_oc stored as parquet as
|
||||
select substr(source, 4) as id, count(distinct substr(target, 4)) as references
|
||||
from ${openaire_db_name}.relation rel
|
||||
join ${openaire_db_name}.result r1 on rel.source=r1.id
|
||||
|
|
|
@ -42,7 +42,7 @@ join ${stats_db_name}.result res on res.id=r.id;
|
|||
create table ${stats_db_name}.result_apc as
|
||||
select r.id, r.amount, r.currency
|
||||
from (
|
||||
select substr(r.id, 4) as id, inst.processingchargeamount.value as amount, inst.processingchargecurrency.value as currency
|
||||
select substr(r.id, 4) as id, cast(inst.processingchargeamount.value as float) as amount, inst.processingchargecurrency.value as currency
|
||||
from ${openaire_db_name}.result r lateral view explode(r.instance) instances as inst) r
|
||||
join ${stats_db_name}.result res on res.id=r.id
|
||||
where r.amount is not null;
|
||||
|
|
|
@ -454,16 +454,16 @@ FROM publication_datasources pd
|
|||
compute stats indi_pub_hybrid_oa_with_cc;
|
||||
|
||||
create table indi_pub_downloads stored as parquet as
|
||||
SELECT result_id, sum(downloads) no_dowloads from openaire_prod_usage_stats.usage_stats
|
||||
SELECT result_id, sum(downloads) no_downloads from openaire_prod_usage_stats.usage_stats
|
||||
join publication on result_id=id
|
||||
where downloads>0
|
||||
GROUP BY result_id
|
||||
order by no_dowloads desc;
|
||||
order by no_downloads desc;
|
||||
|
||||
compute stats indi_pub_downloads;
|
||||
|
||||
create table indi_pub_downloads_datasource stored as parquet as
|
||||
SELECT result_id, repository_id, sum(downloads) no_dowloads from openaire_prod_usage_stats.usage_stats
|
||||
SELECT result_id, repository_id, sum(downloads) no_downloads from openaire_prod_usage_stats.usage_stats
|
||||
join publication on result_id=id
|
||||
where downloads>0
|
||||
GROUP BY result_id, repository_id
|
||||
|
@ -472,7 +472,7 @@ order by result_id;
|
|||
compute stats indi_pub_downloads_datasource;
|
||||
|
||||
create table indi_pub_downloads_year stored as parquet as
|
||||
SELECT result_id, substring(us.`date`, 1,4) as `year`, sum(downloads) no_dowloads from openaire_prod_usage_stats.usage_stats us
|
||||
SELECT result_id, substring(us.`date`, 1,4) as `year`, sum(downloads) no_downloads from openaire_prod_usage_stats.usage_stats us
|
||||
join publication on result_id=id where downloads>0
|
||||
GROUP BY result_id, `year`
|
||||
order by `year` asc;
|
||||
|
@ -480,7 +480,7 @@ order by `year` asc;
|
|||
compute stats indi_pub_downloads_year;
|
||||
|
||||
create table indi_pub_downloads_datasource_year stored as parquet as
|
||||
SELECT result_id, substring(us.`date`, 1,4) as `year`, repository_id, sum(downloads) no_dowloads from openaire_prod_usage_stats.usage_stats us
|
||||
SELECT result_id, substring(us.`date`, 1,4) as `year`, repository_id, sum(downloads) no_downloads from openaire_prod_usage_stats.usage_stats us
|
||||
join publication on result_id=id
|
||||
where downloads>0
|
||||
GROUP BY result_id, repository_id, `year`
|
||||
|
|
|
@ -39,7 +39,6 @@ create table TARGET.result stored as parquet as
|
|||
'openorgs____::5f31346d444a7f06a28c880fb170b0f6', --Ghent University
|
||||
'openorgs____::2dbe47117fd5409f9c61620813456632', --University of Luxembourg
|
||||
'openorgs____::6445d7758d3a40c4d997953b6632a368', --National Institute of Informatics (NII)
|
||||
|
||||
'openorgs____::b77c01aa15de3675da34277d48de2ec1', -- Valencia Catholic University Saint Vincent Martyr
|
||||
'openorgs____::7fe2f66cdc43983c6b24816bfe9cf6a0', -- Unviersity of Warsaw
|
||||
'openorgs____::15e7921fc50d9aa1229a82a84429419e', -- University Of Thessaly
|
||||
|
@ -224,18 +223,3 @@ create table TARGET.indi_result_with_pid stored as parquet as select * from SOUR
|
|||
--create table TARGET.indi_software_gold_oa stored as parquet as select * from SOURCE.indi_software_gold_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||
--compute stats TARGET.indi_software_gold_oa;
|
||||
|
||||
--denorm
|
||||
alter table TARGET.result rename to TARGET.res_tmp;
|
||||
|
||||
create table TARGET.result_denorm stored as parquet as
|
||||
select distinct r.*, rp.project, p.acronym as pacronym, p.title as ptitle, p.funder as pfunder, p.funding_lvl0 as pfunding_lvl0, rd.datasource, d.name as dname, d.type as dtype
|
||||
from TARGET.res_tmp r
|
||||
left outer join TARGET.result_projects rp on rp.id=r.id
|
||||
left outer join TARGET.result_datasources rd on rd.id=r.id
|
||||
left outer join TARGET.project p on p.id=rp.project
|
||||
left outer join TARGET.datasource d on d.id=rd.datasource;
|
||||
compute stats TARGET.result_denorm;
|
||||
|
||||
alter table TARGET.result_denorm rename to TARGET.result;
|
||||
drop table TARGET.res_tmp;
|
||||
--- done!
|
|
@ -48,7 +48,9 @@ CREATE TABLE ${stats_db_name}.project_tmp
|
|||
delayedpubs INT,
|
||||
callidentifier STRING,
|
||||
code STRING,
|
||||
totalcost FLOAT
|
||||
totalcost FLOAT,
|
||||
fundedamount FLOAT,
|
||||
currency STRING
|
||||
) CLUSTERED BY (id) INTO 100 buckets stored AS orc tblproperties ('transactional' = 'true');
|
||||
|
||||
INSERT INTO ${stats_db_name}.project_tmp
|
||||
|
@ -72,7 +74,9 @@ SELECT substr(p.id, 4) AS id,
|
|||
0 AS delayedpubs,
|
||||
p.callidentifier.value AS callidentifier,
|
||||
p.code.value AS code,
|
||||
p.totalcost AS totalcost
|
||||
p.totalcost AS totalcost,
|
||||
p.fundedamount AS fundedamount,
|
||||
p.currency.value AS currency
|
||||
FROM ${openaire_db_name}.project p
|
||||
WHERE p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
||||
|
||||
|
|
Loading…
Reference in New Issue