resolving conflicts

This commit is contained in:
Miriam Baglioni 2022-11-28 10:44:56 +01:00
commit 0628df7a3a
36 changed files with 1370 additions and 264 deletions

View File

@ -211,7 +211,7 @@ public class GraphCleaningFunctions extends CleaningFunctions {
.orElse(s.getValue()), .orElse(s.getValue()),
Function.identity(), Function.identity(),
(s1, s2) -> Collections (s1, s2) -> Collections
.min(Lists.newArrayList(s1, s1), new SubjectProvenanceComparator()))) .min(Lists.newArrayList(s1, s2), new SubjectProvenanceComparator())))
.values()); .values());
r.setSubject(subjects); r.setSubject(subjects);
} }

View File

@ -49,7 +49,7 @@ object DataciteToOAFTransformation {
/** This method should skip record if json contains invalid text /** This method should skip record if json contains invalid text
* defined in file datacite_filter * defined in file datacite_filter
* *
* @param record : unparsed datacite record * @param record : not parsed Datacite record
* @param json : parsed record * @param json : parsed record
* @return True if the record should be skipped * @return True if the record should be skipped
*/ */
@ -98,6 +98,10 @@ object DataciteToOAFTransformation {
} }
/** This utility method indicates whether the embargo date has been reached
* @param embargo_end_date
* @return True if the embargo date has been reached, false otherwise
*/
def embargo_end(embargo_end_date: String): Boolean = { def embargo_end(embargo_end_date: String): Boolean = {
val dt = LocalDate.parse(embargo_end_date, DateTimeFormatter.ofPattern("[yyyy-MM-dd]")) val dt = LocalDate.parse(embargo_end_date, DateTimeFormatter.ofPattern("[yyyy-MM-dd]"))
val td = LocalDate.now() val td = LocalDate.now()
@ -142,6 +146,21 @@ object DataciteToOAFTransformation {
} }
} }
/** *
* Use the vocabulary dnet:publication_resource to find a synonym to one of these terms and get the instance.type.
* Using the dnet:result_typologies vocabulary, we look up the instance.type synonym
* to generate one of the following main entities:
* - publication
* - dataset
* - software
* - otherresearchproduct
*
* @param resourceType
* @param resourceTypeGeneral
* @param schemaOrg
* @param vocabularies
* @return
*/
def getTypeQualifier( def getTypeQualifier(
resourceType: String, resourceType: String,
resourceTypeGeneral: String, resourceTypeGeneral: String,
@ -330,6 +349,7 @@ object DataciteToOAFTransformation {
if (result == null) if (result == null)
return List() return List()
// DOI is mapped on a PID inside a Instance object
val doi_q = OafMapperUtils.qualifier( val doi_q = OafMapperUtils.qualifier(
"doi", "doi",
"doi", "doi",
@ -338,6 +358,8 @@ object DataciteToOAFTransformation {
) )
val pid = OafMapperUtils.structuredProperty(doi, doi_q, dataInfo) val pid = OafMapperUtils.structuredProperty(doi, doi_q, dataInfo)
result.setPid(List(pid).asJava) result.setPid(List(pid).asJava)
// This identifiere will be replaced in a second moment using the PID logic generation
result.setId(OafMapperUtils.createOpenaireId(50, s"datacite____::$doi", true)) result.setId(OafMapperUtils.createOpenaireId(50, s"datacite____::$doi", true))
result.setOriginalId(List(doi).asJava) result.setOriginalId(List(doi).asJava)
@ -386,6 +408,10 @@ object DataciteToOAFTransformation {
a a
} }
if (authors == null || authors.isEmpty || !authors.exists(a => a != null))
return List()
result.setAuthor(authors.asJava)
val titles: List[TitleType] = (json \\ "titles").extractOrElse[List[TitleType]](List()) val titles: List[TitleType] = (json \\ "titles").extractOrElse[List[TitleType]](List())
result.setTitle( result.setTitle(
@ -409,10 +435,6 @@ object DataciteToOAFTransformation {
.asJava .asJava
) )
if (authors == null || authors.isEmpty || !authors.exists(a => a != null))
return List()
result.setAuthor(authors.asJava)
val dates = (json \\ "dates").extract[List[DateType]] val dates = (json \\ "dates").extract[List[DateType]]
val publication_year = (json \\ "publicationYear").extractOrElse[String](null) val publication_year = (json \\ "publicationYear").extractOrElse[String](null)

View File

@ -554,7 +554,7 @@ public class PublicationToOaf implements Serializable {
private KeyValue createCollectedFrom() { private KeyValue createCollectedFrom() {
KeyValue cf = new KeyValue(); KeyValue cf = new KeyValue();
cf.setValue(ModelConstants.ORCID.toUpperCase()); cf.setValue(ModelConstants.ORCID.toUpperCase());
cf.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + "806360c771262b4d6770e7cdf04b5c5a"); cf.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + "cd0f74b5955dc87fd0605745c4b49ee8");
return cf; return cf;
} }

View File

@ -134,13 +134,19 @@ public class ResultTagger implements Serializable {
/* Tagging for Advanced Constraints */ /* Tagging for Advanced Constraints */
final Set<String> aconstraints = new HashSet<>(); final Set<String> aconstraints = new HashSet<>();
conf.getSelectionConstraintsMap().keySet() conf
.forEach(communityId -> { .getSelectionConstraintsMap()
if(conf.getSelectionConstraintsMap().get(communityId) != null && .keySet()
conf.getSelectionConstraintsMap().get(communityId) .forEach(communityId -> {
.getCriteria().stream().anyMatch(crit -> crit.verifyCriteria(param))) if (conf.getSelectionConstraintsMap().get(communityId) != null &&
aconstraints.add(communityId); conf
}); .getSelectionConstraintsMap()
.get(communityId)
.getCriteria()
.stream()
.anyMatch(crit -> crit.verifyCriteria(param)))
aconstraints.add(communityId);
});
communities.addAll(aconstraints); communities.addAll(aconstraints);
@ -152,7 +158,7 @@ public class ResultTagger implements Serializable {
} }
result.getContext().forEach(c -> { result.getContext().forEach(c -> {
String cId = c.getId(); final String cId = c.getId();
if (communities.contains(cId)) { if (communities.contains(cId)) {
Optional<List<DataInfo>> opt_dataInfoList = Optional.ofNullable(c.getDataInfo()); Optional<List<DataInfo>> opt_dataInfoList = Optional.ofNullable(c.getDataInfo());
List<DataInfo> dataInfoList; List<DataInfo> dataInfoList;
@ -164,21 +170,48 @@ public class ResultTagger implements Serializable {
} }
if (subjects.contains(cId)) if (subjects.contains(cId))
dataInfoList dataInfoList
.add(OafMapperUtils.dataInfo(false, BULKTAG_DATA_INFO_TYPE, true, false, .add(
OafMapperUtils.qualifier(CLASS_ID_SUBJECT, CLASS_NAME_BULKTAG_SUBJECT, DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS), TAGGING_TRUST)); OafMapperUtils
.dataInfo(
false, BULKTAG_DATA_INFO_TYPE, true, false,
OafMapperUtils
.qualifier(
CLASS_ID_SUBJECT, CLASS_NAME_BULKTAG_SUBJECT, DNET_PROVENANCE_ACTIONS,
DNET_PROVENANCE_ACTIONS),
TAGGING_TRUST));
if (datasources.contains(cId)) if (datasources.contains(cId))
dataInfoList dataInfoList
.add(OafMapperUtils.dataInfo(false, BULKTAG_DATA_INFO_TYPE, true, false, .add(
OafMapperUtils.qualifier(CLASS_ID_DATASOURCE, CLASS_NAME_BULKTAG_DATASOURCE, DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS), TAGGING_TRUST)); OafMapperUtils
.dataInfo(
false, BULKTAG_DATA_INFO_TYPE, true, false,
OafMapperUtils
.qualifier(
CLASS_ID_DATASOURCE, CLASS_NAME_BULKTAG_DATASOURCE, DNET_PROVENANCE_ACTIONS,
DNET_PROVENANCE_ACTIONS),
TAGGING_TRUST));
if (czenodo.contains(cId)) if (czenodo.contains(cId))
dataInfoList dataInfoList
.add(OafMapperUtils.dataInfo(false, BULKTAG_DATA_INFO_TYPE, true, false, .add(
OafMapperUtils.qualifier(CLASS_ID_CZENODO, CLASS_NAME_BULKTAG_ZENODO, DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS), TAGGING_TRUST)); OafMapperUtils
.dataInfo(
false, BULKTAG_DATA_INFO_TYPE, true, false,
OafMapperUtils
.qualifier(
CLASS_ID_CZENODO, CLASS_NAME_BULKTAG_ZENODO, DNET_PROVENANCE_ACTIONS,
DNET_PROVENANCE_ACTIONS),
TAGGING_TRUST));
if (aconstraints.contains(cId)) if (aconstraints.contains(cId))
dataInfoList dataInfoList
.add( .add(
OafMapperUtils.dataInfo(false, BULKTAG_DATA_INFO_TYPE, true, false, OafMapperUtils
OafMapperUtils.qualifier(CLASS_ID_ADVANCED_CONSTRAINT, CLASS_NAME_BULKTAG_ADVANCED_CONSTRAINT, DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS), TAGGING_TRUST)); .dataInfo(
false, BULKTAG_DATA_INFO_TYPE, true, false,
OafMapperUtils
.qualifier(
CLASS_ID_ADVANCED_CONSTRAINT, CLASS_NAME_BULKTAG_ADVANCED_CONSTRAINT,
DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS),
TAGGING_TRUST));
} }
}); });
@ -199,21 +232,48 @@ public class ResultTagger implements Serializable {
List<DataInfo> dataInfoList = new ArrayList<>(); List<DataInfo> dataInfoList = new ArrayList<>();
if (subjects.contains(c)) if (subjects.contains(c))
dataInfoList dataInfoList
.add(OafMapperUtils.dataInfo(false, BULKTAG_DATA_INFO_TYPE, true, false, .add(
OafMapperUtils.qualifier(CLASS_ID_SUBJECT, CLASS_NAME_BULKTAG_SUBJECT, DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS), TAGGING_TRUST)); OafMapperUtils
.dataInfo(
false, BULKTAG_DATA_INFO_TYPE, true, false,
OafMapperUtils
.qualifier(
CLASS_ID_SUBJECT, CLASS_NAME_BULKTAG_SUBJECT, DNET_PROVENANCE_ACTIONS,
DNET_PROVENANCE_ACTIONS),
TAGGING_TRUST));
if (datasources.contains(c)) if (datasources.contains(c))
dataInfoList dataInfoList
.add(OafMapperUtils.dataInfo(false, BULKTAG_DATA_INFO_TYPE, true, false, .add(
OafMapperUtils.qualifier(CLASS_ID_DATASOURCE, CLASS_NAME_BULKTAG_DATASOURCE, DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS), TAGGING_TRUST)); OafMapperUtils
.dataInfo(
false, BULKTAG_DATA_INFO_TYPE, true, false,
OafMapperUtils
.qualifier(
CLASS_ID_DATASOURCE, CLASS_NAME_BULKTAG_DATASOURCE,
DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS),
TAGGING_TRUST));
if (czenodo.contains(c)) if (czenodo.contains(c))
dataInfoList dataInfoList
.add(OafMapperUtils.dataInfo(false, BULKTAG_DATA_INFO_TYPE, true, false, .add(
OafMapperUtils.qualifier(CLASS_ID_CZENODO, CLASS_NAME_BULKTAG_ZENODO, DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS), TAGGING_TRUST)); OafMapperUtils
.dataInfo(
false, BULKTAG_DATA_INFO_TYPE, true, false,
OafMapperUtils
.qualifier(
CLASS_ID_CZENODO, CLASS_NAME_BULKTAG_ZENODO, DNET_PROVENANCE_ACTIONS,
DNET_PROVENANCE_ACTIONS),
TAGGING_TRUST));
if (aconstraints.contains(c)) if (aconstraints.contains(c))
dataInfoList dataInfoList
.add( .add(
OafMapperUtils.dataInfo(false, BULKTAG_DATA_INFO_TYPE, true, false, OafMapperUtils
OafMapperUtils.qualifier(CLASS_ID_ADVANCED_CONSTRAINT, CLASS_NAME_BULKTAG_ADVANCED_CONSTRAINT, DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS), TAGGING_TRUST)); .dataInfo(
false, BULKTAG_DATA_INFO_TYPE, true, false,
OafMapperUtils
.qualifier(
CLASS_ID_ADVANCED_CONSTRAINT, CLASS_NAME_BULKTAG_ADVANCED_CONSTRAINT,
DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS),
TAGGING_TRUST));
context.setDataInfo(dataInfoList); context.setDataInfo(dataInfoList);
return context; return context;

View File

@ -3,7 +3,7 @@ package eu.dnetlib.dhp.bulktag.criteria;
import java.io.Serializable; import java.io.Serializable;
@VerbClass("contains_ignorecase") @VerbClass("contains_caseinsensitive")
public class ContainsVerbIgnoreCase implements Selection, Serializable { public class ContainsVerbIgnoreCase implements Selection, Serializable {
private String param; private String param;

View File

@ -3,7 +3,7 @@ package eu.dnetlib.dhp.bulktag.criteria;
import java.io.Serializable; import java.io.Serializable;
@VerbClass("equals_ignorecase") @VerbClass("equals_caseinsensitive")
public class EqualVerbIgnoreCase implements Selection, Serializable { public class EqualVerbIgnoreCase implements Selection, Serializable {
private String param; private String param;

View File

@ -3,7 +3,7 @@ package eu.dnetlib.dhp.bulktag.criteria;
import java.io.Serializable; import java.io.Serializable;
@VerbClass("not_contains_ignorecase") @VerbClass("not_contains_caseinsensitive")
public class NotContainsVerbIgnoreCase implements Selection, Serializable { public class NotContainsVerbIgnoreCase implements Selection, Serializable {
private String param; private String param;

View File

@ -3,7 +3,7 @@ package eu.dnetlib.dhp.bulktag.criteria;
import java.io.Serializable; import java.io.Serializable;
@VerbClass("not_equals_ignorecase") @VerbClass("not_equals_caseinsensitive")
public class NotEqualVerbIgnoreCase implements Selection, Serializable { public class NotEqualVerbIgnoreCase implements Selection, Serializable {
private String param; private String param;

View File

@ -1193,7 +1193,7 @@
<organizations/> <organizations/>
</community> </community>
<community id="science-innovation-policy"> <community id="science-innovation-policy">
<advancedConstraints>{"criteria":[{"constraint":[{"verb":"equals_ignorecase","field":"subject","value":"ciencias de la comunicación"}, <advancedConstraints>{"criteria":[{"constraint":[{"verb":"equals_caseinsensitive","field":"subject","value":"ciencias de la comunicación"},
{"verb":"equals","field":"subject","value":"Miriam"}]}, {"verb":"equals","field":"subject","value":"Miriam"}]},
{"constraint":[{"verb":"equals","field":"subject","value":"miriam"}]}]}</advancedConstraints> {"constraint":[{"verb":"equals","field":"subject","value":"miriam"}]}]}</advancedConstraints>
<subjects> <subjects>
@ -1317,81 +1317,81 @@
<datasources> <datasources>
<datasource> <datasource>
<openaireId>opendoar____::358aee4cc897452c00244351e4d91f69</openaireId> <openaireId>opendoar____::358aee4cc897452c00244351e4d91f69</openaireId>
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"COVID-19"}]}, <selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},
{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"SARS-CoV-2"}]}, {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},
{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"2019-nCoV"}}]} {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}}]}
</selcriteria> </selcriteria>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>re3data_____::7b0ad08687b2c960d5aeef06f811d5e6</openaireId> <openaireId>re3data_____::7b0ad08687b2c960d5aeef06f811d5e6</openaireId>
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"COVID-19"}]}, <selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},
{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"SARS-CoV-2"}]}, {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},
{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"2019-nCoV"}]}]} {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]}
</selcriteria> </selcriteria>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>driver______::bee53aa31dc2cbb538c10c2b65fa5824</openaireId> <openaireId>driver______::bee53aa31dc2cbb538c10c2b65fa5824</openaireId>
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"COVID-19"}]}, <selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},
{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"SARS-CoV-2"}]}, {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},
{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"2019-nCoV"}]}]} {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]}
</selcriteria> </selcriteria>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>openaire____::437f4b072b1aa198adcbc35910ff3b98</openaireId> <openaireId>openaire____::437f4b072b1aa198adcbc35910ff3b98</openaireId>
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"COVID-19"}]}, <selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},
{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"SARS-CoV-2"}]}, {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},
{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"2019-nCoV"}]}]} {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]}
</selcriteria> </selcriteria>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>openaire____::081b82f96300b6a6e3d282bad31cb6e2</openaireId> <openaireId>openaire____::081b82f96300b6a6e3d282bad31cb6e2</openaireId>
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"COVID-19"}]}, <selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},
{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"SARS-CoV-2"}]}, {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},
{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"2019-nCoV"}]}]} {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]}
</selcriteria> </selcriteria>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>openaire____::9e3be59865b2c1c335d32dae2fe7b254</openaireId> <openaireId>openaire____::9e3be59865b2c1c335d32dae2fe7b254</openaireId>
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"COVID-19"}]}, <selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},
{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"SARS-CoV-2"}]}, {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},
{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"2019-nCoV"}]}]} {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]}
</selcriteria> </selcriteria>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>opendoar____::8b6dd7db9af49e67306feb59a8bdc52c</openaireId> <openaireId>opendoar____::8b6dd7db9af49e67306feb59a8bdc52c</openaireId>
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"COVID-19"}]}, <selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},
{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"SARS-CoV-2"}]}, {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},
{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"2019-nCoV"}]}]} {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]}
</selcriteria> </selcriteria>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>share_______::4719356ec8d7d55d3feb384ce879ad6c</openaireId> <openaireId>share_______::4719356ec8d7d55d3feb384ce879ad6c</openaireId>
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"COVID-19"}]}, <selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},
{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"SARS-CoV-2"}]}, {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},
{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"2019-nCoV"}]}]} {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]}
</selcriteria> </selcriteria>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>share_______::bbd802baad85d1fd440f32a7a3a2c2b1</openaireId> <openaireId>share_______::bbd802baad85d1fd440f32a7a3a2c2b1</openaireId>
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"COVID-19"}]}, <selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},
{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"SARS-CoV-2"}]}, {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},
{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"2019-nCoV"}]}]} {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]}
</selcriteria> </selcriteria>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>opendoar____::6f4922f45568161a8cdf4ad2299f6d23</openaireId> <openaireId>opendoar____::6f4922f45568161a8cdf4ad2299f6d23</openaireId>
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"COVID-19"}]}, <selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},
{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"SARS-CoV-2"}]}, {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},
{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"2019-nCoV"}]}]} {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]}
</selcriteria> </selcriteria>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>re3data_____::7980778c78fb4cf0fab13ce2159030dc</openaireId> <openaireId>re3data_____::7980778c78fb4cf0fab13ce2159030dc</openaireId>
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"SARS-CoV-2"}]},{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"COVID-19"}]},{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"2019-nCov"}]}]}</selcriteria> <selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCov"}]}]}</selcriteria>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>re3data_____::978378def740bbf2bfb420de868c460b</openaireId> <openaireId>re3data_____::978378def740bbf2bfb420de868c460b</openaireId>
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"SARS-CoV-2"}]},{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"COVID-19"}]},{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"2019-nCov"}]}]}</selcriteria> <selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCov"}]}]}</selcriteria>
</datasource> </datasource>
</datasources> </datasources>
<zenodocommunities> <zenodocommunities>

View File

@ -0,0 +1,25 @@
# Root logger option
log4j.rootLogger=DEBUG, stdout
# Direct log messages to stdout
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.Target=System.out
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n
# Change this to set Spark log level
log4j.logger.org.apache.spark=ERROR
log4j.rootCategory=WARN
# Silence akka remoting
log4j.logger.Remoting=WARN
# Ignore messages below warning level from Jetty, because it's a bit verbose
log4j.logger.org.eclipse.jetty=WARN
log4j.logger.org.apache.hadoop.mapreduce.lib.output.FileOutputCommitterFactory=WARN
log4j.logger.org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter=WARN
log4j.logger.org.apache.parquet.hadoop.ParquetOutputFormat=WARN
log4j.logger.org.apache.parquet.hadoop.InternalParquetRecordWriter=WARN
log4j.logger.org.apache.hadoop.io.compress.CodecPool=WARN
log4j.logger.org.apache.parquet.hadoop.codec.CodecConfig=WARN

View File

@ -3,6 +3,7 @@ package eu.dnetlib.dhp.oa.graph.clean;
import java.io.Serializable; import java.io.Serializable;
import java.util.HashMap; import java.util.HashMap;
import java.util.Objects;
import java.util.concurrent.atomic.AtomicReference; import java.util.concurrent.atomic.AtomicReference;
import org.apache.commons.lang3.SerializationUtils; import org.apache.commons.lang3.SerializationUtils;
@ -10,6 +11,7 @@ import org.apache.commons.lang3.StringUtils;
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableConsumer; import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableConsumer;
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
import eu.dnetlib.dhp.common.vocabulary.VocabularyTerm;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.*;
@ -31,29 +33,30 @@ public class CleaningRuleMap extends HashMap<Class<?>, SerializableConsumer<Obje
} }
private static void cleanSubject(VocabularyGroup vocabularies, Subject subject) { private static void cleanSubject(VocabularyGroup vocabularies, Subject subject) {
if (cleanSubjectForVocabulary(ModelConstants.DNET_SUBJECT_FOS_CLASSID, vocabularies, subject)) { cleanSubjectForVocabulary(ModelConstants.DNET_SUBJECT_FOS_CLASSID, vocabularies, subject);
return; // TODO cleaning based on different subject vocabs can be added here
} else {
// TODO cleaning based on different subject vocabs can be added here
}
} }
private static boolean cleanSubjectForVocabulary(String vocabularyId, VocabularyGroup vocabularies, private static void cleanSubjectForVocabulary(String vocabularyId, VocabularyGroup vocabularies,
Subject subject) { Subject subject) {
AtomicReference<Boolean> modified = new AtomicReference<>(false);
vocabularies.find(vocabularyId).ifPresent(vocabulary -> { vocabularies.find(vocabularyId).ifPresent(vocabulary -> {
if (!ModelConstants.DNET_SUBJECT_KEYWORD.equalsIgnoreCase(subject.getQualifier().getClassid())) { if (ModelConstants.DNET_SUBJECT_KEYWORD.equalsIgnoreCase(subject.getQualifier().getClassid())) {
return; Qualifier newValue = vocabulary.lookup(subject.getValue());
} if (!ModelConstants.UNKNOWN.equals(newValue.getClassid())) {
Qualifier newValue = vocabulary.lookup(subject.getValue()); subject.setValue(newValue.getClassid());
if (!ModelConstants.UNKNOWN.equals(newValue.getClassid())) { subject.getQualifier().setClassid(vocabularyId);
subject.setValue(newValue.getClassid()); subject.getQualifier().setClassname(vocabulary.getName());
subject.getQualifier().setClassid(vocabularyId); }
subject.getQualifier().setClassname(vocabulary.getName()); } else if (vocabularyId.equals(subject.getQualifier().getClassid())) {
modified.set(true); Qualifier syn = vocabulary.getSynonymAsQualifier(subject.getValue());
VocabularyTerm term = vocabulary.getTerm(subject.getValue());
if (Objects.isNull(syn) && Objects.isNull(term)) {
subject.getQualifier().setClassid(ModelConstants.DNET_SUBJECT_KEYWORD);
subject.getQualifier().setClassname(ModelConstants.DNET_SUBJECT_KEYWORD);
}
} }
}); });
return modified.get();
} }
private static void cleanRelation(VocabularyGroup vocabularies, Relation r) { private static void cleanRelation(VocabularyGroup vocabularies, Relation r) {

View File

@ -43,7 +43,7 @@ public class CleanCountrySparkJob implements Serializable {
String jsonConfiguration = IOUtils String jsonConfiguration = IOUtils
.toString( .toString(
CleanContextSparkJob.class CleanCountrySparkJob.class
.getResourceAsStream( .getResourceAsStream(
"/eu/dnetlib/dhp/oa/graph/input_clean_country_parameters.json")); "/eu/dnetlib/dhp/oa/graph/input_clean_country_parameters.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
@ -117,7 +117,7 @@ public class CleanCountrySparkJob implements Serializable {
p -> p p -> p
.getQualifier() .getQualifier()
.getClassid() .getClassid()
.equals(PidType.doi) && pidInParam(p.getValue(), verifyParam))) { .equals(PidType.doi.toString()) && pidInParam(p.getValue(), verifyParam))) {
r r
.setCountry( .setCountry(
r r

View File

@ -65,7 +65,6 @@ public class GetDatasourceFromCountry implements Serializable {
conf, conf,
isSparkSessionManaged, isSparkSessionManaged,
spark -> { spark -> {
getDatasourceFromCountry(spark, country, inputPath, workingPath); getDatasourceFromCountry(spark, country, inputPath, workingPath);
}); });
} }
@ -83,7 +82,6 @@ public class GetDatasourceFromCountry implements Serializable {
(FilterFunction<Organization>) o -> !o.getDataInfo().getDeletedbyinference() && (FilterFunction<Organization>) o -> !o.getDataInfo().getDeletedbyinference() &&
o.getCountry().getClassid().length() > 0 && o.getCountry().getClassid().length() > 0 &&
o.getCountry().getClassid().equals(country)); o.getCountry().getClassid().equals(country));
;
// filtering of the relations taking the non deleted by inference and those with IsProvidedBy as relclass // filtering of the relations taking the non deleted by inference and those with IsProvidedBy as relclass
Dataset<Relation> relation = spark Dataset<Relation> relation = spark
@ -97,7 +95,7 @@ public class GetDatasourceFromCountry implements Serializable {
!rel.getDataInfo().getDeletedbyinference()); !rel.getDataInfo().getDeletedbyinference());
organization organization
.joinWith(relation, organization.col("id").equalTo(relation.col("target")), "left") .joinWith(relation, organization.col("id").equalTo(relation.col("target")))
.map((MapFunction<Tuple2<Organization, Relation>, String>) t2 -> t2._2().getSource(), Encoders.STRING()) .map((MapFunction<Tuple2<Organization, Relation>, String>) t2 -> t2._2().getSource(), Encoders.STRING())
.write() .write()
.mode(SaveMode.Overwrite) .mode(SaveMode.Overwrite)

View File

@ -366,6 +366,7 @@ public abstract class AbstractMdRecordToOafMapper {
r.setInstance(instances); r.setInstance(instances);
r.setBestaccessright(OafMapperUtils.createBestAccessRights(instances)); r.setBestaccessright(OafMapperUtils.createBestAccessRights(instances));
r.setEoscifguidelines(prepareEOSCIfGuidelines(doc, info));
} }
protected abstract List<StructuredProperty> prepareResultPids(Document doc, DataInfo info); protected abstract List<StructuredProperty> prepareResultPids(Document doc, DataInfo info);
@ -384,6 +385,25 @@ public abstract class AbstractMdRecordToOafMapper {
return list; return list;
} }
private List<EoscIfGuidelines> prepareEOSCIfGuidelines(Document doc, DataInfo info) {
final Set<EoscIfGuidelines> set = Sets.newHashSet();
for (final Object o : doc.selectNodes("//oaf:eoscifguidelines")) {
final String code = ((Node) o).valueOf("@code");
final String label = ((Node) o).valueOf("@label");
final String url = ((Node) o).valueOf("@url");
final String semrel = ((Node) o).valueOf("@semanticrelation");
if (StringUtils.isNotBlank(code)) {
final EoscIfGuidelines eig = new EoscIfGuidelines();
eig.setCode(code);
eig.setLabel(label);
eig.setUrl(url);
eig.setSemanticRelation(semrel);
set.add(eig);
}
}
return Lists.newArrayList(set);
}
protected abstract Qualifier prepareResourceType(Document doc, DataInfo info); protected abstract Qualifier prepareResourceType(Document doc, DataInfo info);
protected abstract List<Instance> prepareInstances( protected abstract List<Instance> prepareInstances(

View File

@ -177,6 +177,9 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
for (final Object o : doc.selectNodes("//*[local-name()='identifier' and ./@identifierType='landingPage']")) { for (final Object o : doc.selectNodes("//*[local-name()='identifier' and ./@identifierType='landingPage']")) {
url.add(trimAndDecodeUrl(((Node) o).getText().trim())); url.add(trimAndDecodeUrl(((Node) o).getText().trim()));
} }
for (final Object o : doc.selectNodes("//*[local-name()='identifier' and ./@identifierType='w3id']")) {
url.add(trimAndDecodeUrl(((Node) o).getText().trim()));
}
Set<String> validUrl = validateUrl(url); Set<String> validUrl = validateUrl(url);

View File

@ -432,14 +432,14 @@
<error to="Kill"/> <error to="Kill"/>
</action> </action>
<join name="wait_clean_context" to="getHostedby"/> <join name="wait_clean_context" to="select_datasourceId_from_country"/>
<action name="getHostedby"> <action name="select_datasourceId_from_country">
<spark xmlns="uri:oozie:spark-action:0.2"> <spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master> <master>yarn</master>
<mode>cluster</mode> <mode>cluster</mode>
<name>Clean publications context</name> <name>Select datasource ID from country</name>
<class>eu.dnetlib.dhp.oa.graph.clean.country.GetDatasourceFromCountry</class> <class>eu.dnetlib.dhp.oa.graph.clean.country.GetDatasourceFromCountry</class>
<jar>dhp-graph-mapper-${projectVersion}.jar</jar> <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
<spark-opts> <spark-opts>
@ -471,7 +471,7 @@
<spark xmlns="uri:oozie:spark-action:0.2"> <spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master> <master>yarn</master>
<mode>cluster</mode> <mode>cluster</mode>
<name>Clean publications counmtry</name> <name>Clean publication country</name>
<class>eu.dnetlib.dhp.oa.graph.clean.country.CleanCountrySparkJob</class> <class>eu.dnetlib.dhp.oa.graph.clean.country.CleanCountrySparkJob</class>
<jar>dhp-graph-mapper-${projectVersion}.jar</jar> <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
<spark-opts> <spark-opts>
@ -489,10 +489,10 @@
<arg>--workingPath</arg><arg>${workingDir}/working/publication</arg> <arg>--workingPath</arg><arg>${workingDir}/working/publication</arg>
<arg>--country</arg><arg>${country}</arg> <arg>--country</arg><arg>${country}</arg>
<arg>--verifyParam</arg><arg>${verifyCountryParam}</arg> <arg>--verifyParam</arg><arg>${verifyCountryParam}</arg>
<arg>--datasourcePath</arg><arg>${workingDir}/working/hostedby</arg> <arg>--hostedBy</arg><arg>${workingDir}/working/hostedby</arg>
<arg>--collectedfrom</arg><arg>${collectedfrom}</arg> <arg>--collectedfrom</arg><arg>${collectedfrom}</arg>
</spark> </spark>
<ok to="wait_clean_context"/> <ok to="wait_clean_country"/>
<error to="Kill"/> <error to="Kill"/>
</action> </action>
@ -500,7 +500,7 @@
<spark xmlns="uri:oozie:spark-action:0.2"> <spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master> <master>yarn</master>
<mode>cluster</mode> <mode>cluster</mode>
<name>Clean datasets Country</name> <name>Clean dataset country</name>
<class>eu.dnetlib.dhp.oa.graph.clean.country.CleanCountrySparkJob</class> <class>eu.dnetlib.dhp.oa.graph.clean.country.CleanCountrySparkJob</class>
<jar>dhp-graph-mapper-${projectVersion}.jar</jar> <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
<spark-opts> <spark-opts>
@ -518,10 +518,10 @@
<arg>--workingPath</arg><arg>${workingDir}/working/dataset</arg> <arg>--workingPath</arg><arg>${workingDir}/working/dataset</arg>
<arg>--country</arg><arg>${country}</arg> <arg>--country</arg><arg>${country}</arg>
<arg>--verifyParam</arg><arg>${verifyCountryParam}</arg> <arg>--verifyParam</arg><arg>${verifyCountryParam}</arg>
<arg>--datasourcePath</arg><arg>${workingDir}/working/hostedby</arg> <arg>--hostedBy</arg><arg>${workingDir}/working/hostedby</arg>
<arg>--collectedfrom</arg><arg>${collectedfrom}</arg> <arg>--collectedfrom</arg><arg>${collectedfrom}</arg>
</spark> </spark>
<ok to="wait_clean_context"/> <ok to="wait_clean_country"/>
<error to="Kill"/> <error to="Kill"/>
</action> </action>
@ -529,7 +529,7 @@
<spark xmlns="uri:oozie:spark-action:0.2"> <spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master> <master>yarn</master>
<mode>cluster</mode> <mode>cluster</mode>
<name>Clean otherresearchproducts country</name> <name>Clean otherresearchproduct country</name>
<class>eu.dnetlib.dhp.oa.graph.clean.country.CleanCountrySparkJob</class> <class>eu.dnetlib.dhp.oa.graph.clean.country.CleanCountrySparkJob</class>
<jar>dhp-graph-mapper-${projectVersion}.jar</jar> <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
<spark-opts> <spark-opts>
@ -547,10 +547,10 @@
<arg>--workingPath</arg><arg>${workingDir}/working/otherresearchproduct</arg> <arg>--workingPath</arg><arg>${workingDir}/working/otherresearchproduct</arg>
<arg>--country</arg><arg>${country}</arg> <arg>--country</arg><arg>${country}</arg>
<arg>--verifyParam</arg><arg>${verifyCountryParam}</arg> <arg>--verifyParam</arg><arg>${verifyCountryParam}</arg>
<arg>--datasourcePath</arg><arg>${workingDir}/working/hostedby</arg> <arg>--hostedBy</arg><arg>${workingDir}/working/hostedby</arg>
<arg>--collectedfrom</arg><arg>${collectedfrom}</arg> <arg>--collectedfrom</arg><arg>${collectedfrom}</arg>
</spark> </spark>
<ok to="wait_clean_context"/> <ok to="wait_clean_country"/>
<error to="Kill"/> <error to="Kill"/>
</action> </action>
@ -558,7 +558,7 @@
<spark xmlns="uri:oozie:spark-action:0.2"> <spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master> <master>yarn</master>
<mode>cluster</mode> <mode>cluster</mode>
<name>Clean softwares country</name> <name>Clean software country</name>
<class>eu.dnetlib.dhp.oa.graph.clean.country.CleanCountrySparkJob</class> <class>eu.dnetlib.dhp.oa.graph.clean.country.CleanCountrySparkJob</class>
<jar>dhp-graph-mapper-${projectVersion}.jar</jar> <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
<spark-opts> <spark-opts>
@ -576,7 +576,7 @@
<arg>--workingPath</arg><arg>${workingDir}/working/software</arg> <arg>--workingPath</arg><arg>${workingDir}/working/software</arg>
<arg>--country</arg><arg>${country}</arg> <arg>--country</arg><arg>${country}</arg>
<arg>--verifyParam</arg><arg>${verifyCountryParam}</arg> <arg>--verifyParam</arg><arg>${verifyCountryParam}</arg>
<arg>--datasourcePath</arg><arg>${workingDir}/working/hostedby</arg> <arg>--hostedBy</arg><arg>${workingDir}/working/hostedby</arg>
<arg>--collectedfrom</arg><arg>${collectedfrom}</arg> <arg>--collectedfrom</arg><arg>${collectedfrom}</arg>
</spark> </spark>
<ok to="wait_clean_country"/> <ok to="wait_clean_country"/>

View File

@ -126,6 +126,7 @@
<arg>--hiveDbName</arg><arg>${hiveDbName}</arg> <arg>--hiveDbName</arg><arg>${hiveDbName}</arg>
<arg>--className</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg> <arg>--className</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
<arg>--hiveMetastoreUris</arg><arg>${hiveMetastoreUris}</arg> <arg>--hiveMetastoreUris</arg><arg>${hiveMetastoreUris}</arg>
<arg>--numPartitions</arg><arg>8000</arg>
</spark> </spark>
<ok to="join_import"/> <ok to="join_import"/>
<error to="Kill"/> <error to="Kill"/>
@ -152,6 +153,7 @@
<arg>--hiveDbName</arg><arg>${hiveDbName}</arg> <arg>--hiveDbName</arg><arg>${hiveDbName}</arg>
<arg>--className</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg> <arg>--className</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
<arg>--hiveMetastoreUris</arg><arg>${hiveMetastoreUris}</arg> <arg>--hiveMetastoreUris</arg><arg>${hiveMetastoreUris}</arg>
<arg>--numPartitions</arg><arg>4000</arg>
</spark> </spark>
<ok to="join_import"/> <ok to="join_import"/>
<error to="Kill"/> <error to="Kill"/>
@ -178,6 +180,7 @@
<arg>--hiveDbName</arg><arg>${hiveDbName}</arg> <arg>--hiveDbName</arg><arg>${hiveDbName}</arg>
<arg>--className</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg> <arg>--className</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
<arg>--hiveMetastoreUris</arg><arg>${hiveMetastoreUris}</arg> <arg>--hiveMetastoreUris</arg><arg>${hiveMetastoreUris}</arg>
<arg>--numPartitions</arg><arg>3000</arg>
</spark> </spark>
<ok to="join_import"/> <ok to="join_import"/>
<error to="Kill"/> <error to="Kill"/>
@ -204,6 +207,7 @@
<arg>--hiveDbName</arg><arg>${hiveDbName}</arg> <arg>--hiveDbName</arg><arg>${hiveDbName}</arg>
<arg>--className</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg> <arg>--className</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
<arg>--hiveMetastoreUris</arg><arg>${hiveMetastoreUris}</arg> <arg>--hiveMetastoreUris</arg><arg>${hiveMetastoreUris}</arg>
<arg>--numPartitions</arg><arg>300</arg>
</spark> </spark>
<ok to="join_import"/> <ok to="join_import"/>
<error to="Kill"/> <error to="Kill"/>
@ -230,6 +234,7 @@
<arg>--hiveDbName</arg><arg>${hiveDbName}</arg> <arg>--hiveDbName</arg><arg>${hiveDbName}</arg>
<arg>--className</arg><arg>eu.dnetlib.dhp.schema.oaf.Datasource</arg> <arg>--className</arg><arg>eu.dnetlib.dhp.schema.oaf.Datasource</arg>
<arg>--hiveMetastoreUris</arg><arg>${hiveMetastoreUris}</arg> <arg>--hiveMetastoreUris</arg><arg>${hiveMetastoreUris}</arg>
<arg>--numPartitions</arg><arg>100</arg>
</spark> </spark>
<ok to="join_import"/> <ok to="join_import"/>
<error to="Kill"/> <error to="Kill"/>
@ -256,6 +261,7 @@
<arg>--hiveDbName</arg><arg>${hiveDbName}</arg> <arg>--hiveDbName</arg><arg>${hiveDbName}</arg>
<arg>--className</arg><arg>eu.dnetlib.dhp.schema.oaf.Organization</arg> <arg>--className</arg><arg>eu.dnetlib.dhp.schema.oaf.Organization</arg>
<arg>--hiveMetastoreUris</arg><arg>${hiveMetastoreUris}</arg> <arg>--hiveMetastoreUris</arg><arg>${hiveMetastoreUris}</arg>
<arg>--numPartitions</arg><arg>400</arg>
</spark> </spark>
<ok to="join_import"/> <ok to="join_import"/>
<error to="Kill"/> <error to="Kill"/>
@ -309,6 +315,7 @@
<arg>--hiveDbName</arg><arg>${hiveDbName}</arg> <arg>--hiveDbName</arg><arg>${hiveDbName}</arg>
<arg>--className</arg><arg>eu.dnetlib.dhp.schema.oaf.Relation</arg> <arg>--className</arg><arg>eu.dnetlib.dhp.schema.oaf.Relation</arg>
<arg>--hiveMetastoreUris</arg><arg>${hiveMetastoreUris}</arg> <arg>--hiveMetastoreUris</arg><arg>${hiveMetastoreUris}</arg>
<arg>--numPartitions</arg><arg>10000</arg>
</spark> </spark>
<ok to="join_import"/> <ok to="join_import"/>
<error to="Kill"/> <error to="Kill"/>

View File

@ -116,54 +116,45 @@ object SparkConvertRDDtoDataset {
.map(s => mapper.readValue(s, classOf[Relation])) .map(s => mapper.readValue(s, classOf[Relation]))
.filter(r => r.getDataInfo != null && !r.getDataInfo.getDeletedbyinference) .filter(r => r.getDataInfo != null && !r.getDataInfo.getDeletedbyinference)
.filter(r => r.getSource.startsWith("50") && r.getTarget.startsWith("50")) .filter(r => r.getSource.startsWith("50") && r.getTarget.startsWith("50"))
.filter(r => filterRelations(subRelTypeFilter, relClassFilter, r)) .filter(r => filterRelations(r))
//filter OpenCitations relations //filter OpenCitations relations
.filter(r => // .filter(r =>
r.getDataInfo.getProvenanceaction != null && // r.getDataInfo.getProvenanceaction != null &&
!"sysimport:crosswalk:opencitations".equals(r.getDataInfo.getProvenanceaction.getClassid) // !"sysimport:crosswalk:opencitations".equals(r.getDataInfo.getProvenanceaction.getClassid)
) // )
spark.createDataset(rddRelation).as[Relation].write.mode(SaveMode.Overwrite).save(s"$relPath") spark.createDataset(rddRelation).as[Relation].write.mode(SaveMode.Overwrite).save(s"$relPath")
} }
private def filterRelations(subRelTypeFilter: String, relClassFilter: List[String], r: Relation): Boolean = { private def filterRelations(r: Relation): Boolean = {
if (StringUtils.isNotBlank(subRelTypeFilter)) {
subRelTypeFilter.equalsIgnoreCase(r.getSubRelType) /** *
} else { * We filter relation generated by dedups
!relClassFilter.exists(k => k.equalsIgnoreCase(r.getRelClass)) * and all the relation that have one single collectedFrom OpenCitation
*/
val relClassFilter = List(
ModelConstants.MERGES,
ModelConstants.IS_MERGED_IN,
ModelConstants.HAS_AMONG_TOP_N_SIMILAR_DOCS,
ModelConstants.IS_AMONG_TOP_N_SIMILAR_DOCS
)
if (relClassFilter.exists(k => k.equalsIgnoreCase(r.getRelClass)))
false
else {
if (r.getCollectedfrom == null || r.getCollectedfrom.size() == 0)
false
else if (r.getCollectedfrom.size() > 1)
true
else if (
r.getCollectedfrom.size() == 1 && r.getCollectedfrom.get(0) != null && "OpenCitations".equalsIgnoreCase(
r.getCollectedfrom.get(0).getValue
)
)
false
else
true
} }
} }
/*
//TODO: finalise implementation
private def processResult[T<: Result](
implicit ct: ClassTag[T],
log: Logger,
spark: SparkSession,
sourcePath: String,
entityPath: String,
clazz: Class[T]
): Unit = {
val entityType = clazz.getSimpleName.toLowerCase
log.info(s"Converting $entityType")
val mapper = new ObjectMapper() with ScalaObjectMapper
mapper.registerModule(DefaultScalaModule)
val rdd = spark.sparkContext
.textFile(s"$sourcePath/$entityType")
.map(s => mapper.readValue(s, clazz))
.filter(r => r.getDataInfo != null && !r.getDataInfo.getDeletedbyinference);
implicit val encoder: Encoder[T] = Encoders.kryo(clazz)
spark
.createDataset(rdd)
.as[T]
.write
.mode(SaveMode.Overwrite)
.save(s"$entityPath/$entityType")
}
*/
} }

View File

@ -278,6 +278,16 @@ public class GraphCleaningFunctionsTest {
s -> "0102 computer and information sciences".equals(s.getValue()) & s -> "0102 computer and information sciences".equals(s.getValue()) &
ModelConstants.DNET_SUBJECT_FOS_CLASSID.equals(s.getQualifier().getClassid()))); ModelConstants.DNET_SUBJECT_FOS_CLASSID.equals(s.getQualifier().getClassid())));
List<Subject> s1 = p_cleaned
.getSubject()
.stream()
.filter(s -> s.getValue().equals("In Situ Hybridization"))
.collect(Collectors.toList());
assertNotNull(s1);
assertEquals(1, s1.size());
assertEquals(ModelConstants.DNET_SUBJECT_KEYWORD, s1.get(0).getQualifier().getClassid());
assertEquals(ModelConstants.DNET_SUBJECT_KEYWORD, s1.get(0).getQualifier().getClassname());
// TODO add more assertions to verity the cleaned values // TODO add more assertions to verity the cleaned values
System.out.println(MAPPER.writeValueAsString(p_cleaned)); System.out.println(MAPPER.writeValueAsString(p_cleaned));
} }

View File

@ -936,11 +936,23 @@ class MappersTest {
System.out.println("***************"); System.out.println("***************");
System.out.println(new ObjectMapper().writeValueAsString(list)); System.out.println(new ObjectMapper().writeValueAsString(list));
System.out.println("***************"); System.out.println("***************");
// final OtherResearchProduct p = (OtherResearchProduct) list.get(0); assertEquals(5, list.size());
// assertValidId(p.getId()); final OtherResearchProduct p = (OtherResearchProduct) list.get(0);
// assertValidId(p.getCollectedfrom().get(0).getKey()); assertValidId(p.getId());
// System.out.println(p.getTitle().get(0).getValue()); assertTrue(p.getId().startsWith("50|w3id"));
// assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue())); assertValidId(p.getCollectedfrom().get(0).getKey());
assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue()));
assertEquals(1, p.getInstance().size());
assertEquals("https://w3id.org/ro-id/0ab171a7-45c5-4194-82d4-850955504bca", p.getPid().get(0).getValue());
Instance inst = p.getInstance().get(0);
assertEquals("https://w3id.org/ro-id/0ab171a7-45c5-4194-82d4-850955504bca", inst.getPid().get(0).getValue());
assertEquals("https://w3id.org/ro-id/0ab171a7-45c5-4194-82d4-850955504bca", inst.getUrl().get(0));
assertEquals(1, p.getEoscifguidelines().size());
assertEquals("EOSC::RO-crate", p.getEoscifguidelines().get(0).getCode());
assertEquals("EOSC::RO-crate", p.getEoscifguidelines().get(0).getLabel());
assertEquals("", p.getEoscifguidelines().get(0).getUrl());
assertEquals("compliesWith", p.getEoscifguidelines().get(0).getSemanticRelation());
} }
@Test @Test
@ -988,6 +1000,17 @@ class MappersTest {
} }
@Test
void testEOSCFuture_ROHub() throws IOException {
final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("photic-zone-transformed.xml")));
final List<Oaf> list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml);
final OtherResearchProduct rocrate = (OtherResearchProduct) list.get(0);
assertNotNull(rocrate.getEoscifguidelines());
System.out.println("***************");
System.out.println(new ObjectMapper().writeValueAsString(rocrate));
System.out.println("***************");
}
@Test @Test
void testNotWellFormed() throws IOException { void testNotWellFormed() throws IOException {
final String xml = IOUtils final String xml = IOUtils

View File

@ -706,6 +706,28 @@
"source": [ "source": [
], ],
"subject": [ "subject": [
{
"dataInfo": {
"provenanceaction": {
"classid": "sysimport:crosswalk:repository",
"classname": "sysimport:crosswalk:repository",
"schemeid": "dnet:provenanceActions",
"schemename": "dnet:provenanceActions"
},
"deletedbyinference": false,
"inferred": false,
"inferenceprovenance": "",
"invisible": false,
"trust": "0.9"
},
"qualifier": {
"classid": "FOS",
"classname": "Fields of Science and Technology classification",
"schemeid": "dnet:result_subject",
"schemename": "dnet:result_subject"
},
"value": "In Situ Hybridization"
},
{ {
"dataInfo": { "dataInfo": {
"deletedbyinference": false, "deletedbyinference": false,

View File

@ -0,0 +1,108 @@
<?xml version="1.0" encoding="UTF-8"?>
<record xmlns:oaire="http://namespace.openaire.eu/schema/oaire/"
xmlns:datacite="http://datacite.org/schema/kernel-4"
xmlns:dr="http://www.driver-repository.eu/namespace/dr"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:oaf="http://namespace.openaire.eu/oaf"
xmlns:oai="http://www.openarchives.org/OAI/2.0/"
xmlns:dri="http://www.driver-repository.eu/namespace/dri"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:dc="http://purl.org/dc/elements/1.1/">
<header xmlns="http://www.openarchives.org/OAI/2.0/">
<dri:objIdentifier>fsh_____4119::68126da991bd76d8be494bddfbf7a1bb</dri:objIdentifier>
<dri:recordIdentifier>https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be</dri:recordIdentifier>
<dri:dateOfCollection/>
<dri:mdFormat/>
<dri:mdFormatInterpretation/>
<dri:repositoryId/>
<dr:objectIdentifier/>
<dr:dateOfCollection>2022-11-15T12:29:19Z</dr:dateOfCollection>
<dr:dateOfTransformation>2022-11-15T12:29:19Z</dr:dateOfTransformation>
<oaf:datasourceprefix>fsh_____4119</oaf:datasourceprefix>
<identifier>https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be</identifier>
<datestamp>2022-11-15T12:29:19Z</datestamp>
<setSpec>rohub_data</setSpec>
<setSpec>ro-crate_data</setSpec>
</header>
<metadata>
<datacite:resource>
<datacite:identifier identifierType="w3id">https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be</datacite:identifier>
<datacite:alternateIdentifiers>
<datacite:alternateIdentifier alternateIdentifierType="w3id">https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be</datacite:alternateIdentifier>
</datacite:alternateIdentifiers>
<datacite:relatedIdentifiers>
<datacite:relatedIdentifier relatedIdentifierType="w3id" relationType="HasPart">https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be/resources/b1b617b2-6b79-4bae-9fa6-b76945645626</datacite:relatedIdentifier>
<datacite:relatedIdentifier relatedIdentifierType="w3id" relationType="HasPart">https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be/resources/78103994-30be-4875-bf89-5acd752b5c3d</datacite:relatedIdentifier>
<datacite:relatedIdentifier relatedIdentifierType="w3id" relationType="HasPart">https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be/resources/18fd1c70-249b-4c67-80ee-539f801a0da7</datacite:relatedIdentifier>
<datacite:relatedIdentifier relatedIdentifierType="w3id" relationType="HasPart">https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be/resources/32faa2eb-4cc8-401f-ac5c-bec2849b70e1</datacite:relatedIdentifier>
<datacite:relatedIdentifier relatedIdentifierType="w3id" relationType="HasPart">https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be/resources/4c253f5a-d427-40c2-9e9f-6063ae087239</datacite:relatedIdentifier>
<datacite:relatedIdentifier relatedIdentifierType="w3id" relationType="HasPart">https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be/resources/371b1957-078c-472b-a195-af7bce152c10</datacite:relatedIdentifier>
<datacite:relatedIdentifier relatedIdentifierType="w3id" relationType="HasPart">https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be/resources/82f9e4b8-01b4-4e50-9e27-ec9d337c8d74</datacite:relatedIdentifier>
</datacite:relatedIdentifiers>
<datacite:resourceType xs:anyURI="http://purl.org/coar/resource_type/c_1843">RO-crate</datacite:resourceType>
<datacite:rightsList>
<datacite:rights rightsURI="https://creativecommons.org/licenses/by/4.0/legalcode">Creative Commons Attribution 4.0 International</datacite:rights>
<datacite:rights rightsURI="http://purl.org/coar/access_right/c_abf2">open access</datacite:rights>
</datacite:rightsList>
<datacite:titles>
<datacite:title>Mapping the photic zone of the Mediterranean Sea</datacite:title>
</datacite:titles>
<datacite:descriptions>
<datacite:description descriptionType="Abstract">Estimating the penetration of light along the water column from satellite data to map the photic zone in the Mediterranean Sea</datacite:description>
</datacite:descriptions>
<datacite:publisher>CNR-ISMAR</datacite:publisher>
<creators xmlns="http://datacite.org/schema/kernel-4">
<creator>
<creatorName>Giorgio Castellan</creatorName>
</creator>
<creator>
<creatorName>Lorenzo Angeletti</creatorName>
</creator>
<creator>
<creatorName>Paolo Montagna</creatorName>
</creator>
<creator>
<creatorName>Marco Taviani</creatorName>
</creator>
</creators>
<dates xmlns="http://datacite.org/schema/kernel-4">
<date dateType="Issued">2022-11-14T16:32:45Z</date>
</dates>
<dc:descriptions>
<dc:description descriptionType="Abstract">Estimating the penetration of light along the water column from satellite data to map the photic zone in the Mediterranean Sea</dc:description>
</dc:descriptions>
<dc:publicationYear>2022</dc:publicationYear>
<rightsList xmlns="http://datacite.org/schema/kernel-4">
<rights rightsURI="http://purl.org/coar/access_right/c_abf2">open access</rights>
</rightsList>
<sizes xmlns="http://datacite.org/schema/kernel-4">
<size>813.478 KB</size>
</sizes>
<subjects xmlns="http://datacite.org/schema/kernel-4">
<subject>Earth sciences</subject>
<subject>Ecology</subject>
<subject>Optics</subject>
</subjects>
</datacite:resource>
<oaf:identifier identifierType="w3id">https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be</oaf:identifier>
<dr:CobjCategory type="other">0048</dr:CobjCategory>
<oaf:dateAccepted>2022-11-14</oaf:dateAccepted>
<oaf:accessrights>OPEN</oaf:accessrights>
<oaf:license>https://creativecommons.org/licenses/by/4.0/legalcode</oaf:license>
<oaf:language/>
<oaf:hostedBy name="ROHub" id="fairsharing_::4119"/>
<oaf:collectedFrom name="ROHub" id="fairsharing_::4119"/>
<oaf:eoscifguidelines code="EOSC::RO-crate"
label="EOSC::RO-crate"
url=""
semanticrelation="compliesWith"/>
<oaf:eoscifguidelines code="EOSC::Jupyter Notebook"
label="EOSC::Jupyter Notebook"
url=""
semanticrelation="compliesWith"/>
<oaf:eoscifguidelines code="EOSC::Data Cube"
label="EOSC::Data Cube"
url=""
semanticrelation="compliesWith"/>
</metadata>
</record>

View File

@ -21,15 +21,13 @@
</header> </header>
<metadata> <metadata>
<datacite:resource> <datacite:resource>
<datacite:identifier identifierType="URL">https://w3id.org/ro-id/0ab171a7-45c5-4194-82d4-850955504bca</datacite:identifier> <datacite:identifier identifierType="w3id">https://w3id.org/ro-id/0ab171a7-45c5-4194-82d4-850955504bca</datacite:identifier>
<datacite:alternateIdentifiers/> <datacite:alternateIdentifiers>
<datacite:alternateIdentifier alternateIdentifierType="w3id">https://w3id.org/ro-id/0ab171a7-45c5-4194-82d4-850955504bca</datacite:alternateIdentifier>
</datacite:alternateIdentifiers>
<datacite:relatedIdentifiers> <datacite:relatedIdentifiers>
<datacite:relatedIdentifier relatedIdentifierType="" relationType=""> <datacite:relatedIdentifier relatedIdentifierType="w3id" relationType="HasPart">https://w3id.org/ro-id/0ab171a7-45c5-4194-82d4-850955504bca/resources/24fae96f-f986-46e1-bfd0-a21ca20ff0ce</datacite:relatedIdentifier>
https://w3id.org/ro-id/0ab171a7-45c5-4194-82d4-850955504bca/resources/24fae96f-f986-46e1-bfd0-a21ca20ff0ce <datacite:relatedIdentifier relatedIdentifierType="w3id" relationType="HasPart">https://w3id.org/ro-id/0ab171a7-45c5-4194-82d4-850955504bca/resources/6d3427a8-352e-49f4-9796-f618c44dc16d</datacite:relatedIdentifier>
</datacite:relatedIdentifier>
<datacite:relatedIdentifier relatedIdentifierType="" relationType="">
https://w3id.org/ro-id/0ab171a7-45c5-4194-82d4-850955504bca/resources/6d3427a8-352e-49f4-9796-f618c44dc16d
</datacite:relatedIdentifier>
</datacite:relatedIdentifiers> </datacite:relatedIdentifiers>
<datacite:resourceType xs:anyURI="http://purl.org/coar/resource_type/c_1843">RO-crate</datacite:resourceType> <datacite:resourceType xs:anyURI="http://purl.org/coar/resource_type/c_1843">RO-crate</datacite:resourceType>
<datacite:rightsList> <datacite:rightsList>
@ -43,21 +41,17 @@
</datacite:descriptions> </datacite:descriptions>
<datacite:publisher>Poznań Supercomputing and Networking Center</datacite:publisher> <datacite:publisher>Poznań Supercomputing and Networking Center</datacite:publisher>
<contributors xmlns="http://datacite.org/schema/kernel-4"> <contributors xmlns="http://datacite.org/schema/kernel-4">
<contributor> <contributor contributorType="Researcher">
<contributor contributorType="Researcher"> <contributorName>Generation Service</contributorName>
<contributorName>Generation Service</contributorName>
</contributor>
</contributor> </contributor>
</contributors> </contributors>
<creators xmlns="http://datacite.org/schema/kernel-4"> <creators xmlns="http://datacite.org/schema/kernel-4">
<creator> <creator>
<creator> <creatorName>CNR-ISMAR</creatorName>
<creatorName>CNR-ISMAR</creatorName>
</creator>
</creator> </creator>
</creators> </creators>
<dates xmlns="http://datacite.org/schema/kernel-4"> <dates xmlns="http://datacite.org/schema/kernel-4">
<date dateType="Created">2018-06-20T11:21:46Z</date> <date dateType="Issued">2018-06-20T11:21:46Z</date>
</dates> </dates>
<dc:descriptions> <dc:descriptions>
<dc:description descriptionType="Abstract">The use of biological effects tools offer enormous potential to meet the challenges outlined by the European Union Marine Strategy Framework Directive (MSFD) whereby Member States are required to develop a robust set of tools for defining 11 qualitative descriptors of Good Environmental Status (GES), such as demonstrating that "Concentrations of contaminants are at levels not giving rise to pollution effects" (GES Descriptor 8). This paper discusses the combined approach of monitoring chemical contaminant levels, along side biological effect measurements relating to the effect of pollutants, for undertaking assessments of GES across European marine regions. We outline the minimum standards that biological effects tools should meet if they are to be used for defining GES in relation to Descriptor 8 and describe the current international initiatives underway to develop assessment criteria for these biological effects techniques. Crown Copyright (C) 2010 Published by Elsevier Ltd. All rights reserved.</dc:description> <dc:description descriptionType="Abstract">The use of biological effects tools offer enormous potential to meet the challenges outlined by the European Union Marine Strategy Framework Directive (MSFD) whereby Member States are required to develop a robust set of tools for defining 11 qualitative descriptors of Good Environmental Status (GES), such as demonstrating that "Concentrations of contaminants are at levels not giving rise to pollution effects" (GES Descriptor 8). This paper discusses the combined approach of monitoring chemical contaminant levels, along side biological effect measurements relating to the effect of pollutants, for undertaking assessments of GES across European marine regions. We outline the minimum standards that biological effects tools should meet if they are to be used for defining GES in relation to Descriptor 8 and describe the current international initiatives underway to develop assessment criteria for these biological effects techniques. Crown Copyright (C) 2010 Published by Elsevier Ltd. All rights reserved.</dc:description>
@ -71,15 +65,18 @@
</sizes> </sizes>
<subjects xmlns="http://datacite.org/schema/kernel-4"> <subjects xmlns="http://datacite.org/schema/kernel-4">
<subject>Ecology</subject> <subject>Ecology</subject>
<subject>EOSC::RO-crate</subject>
</subjects> </subjects>
</datacite:resource> </datacite:resource>
<oaf:identifier identifierType="URL">https://w3id.org/ro-id/0ab171a7-45c5-4194-82d4-850955504bca</oaf:identifier> <oaf:identifier identifierType="w3id">https://w3id.org/ro-id/0ab171a7-45c5-4194-82d4-850955504bca</oaf:identifier>
<dr:CobjCategory type="other research product">other research product</dr:CobjCategory> <dr:CobjCategory type="other research product">other research product</dr:CobjCategory>
<oaf:dateAccepted/> <oaf:dateAccepted>2018-06-20</oaf:dateAccepted>
<oaf:accessrights>OPEN</oaf:accessrights> <oaf:accessrights>OPEN</oaf:accessrights>
<oaf:language/> <oaf:language/>
<oaf:hostedBy name="ROHub" id="fairsharing_::4119"/> <oaf:hostedBy name="ROHub" id="fairsharing_::4119"/>
<oaf:collectedFrom name="ROHub" id="fairsharing_::4119"/> <oaf:collectedFrom name="ROHub" id="fairsharing_::4119"/>
<oaf:eoscifguidelines code="EOSC::RO-crate"
label="EOSC::RO-crate"
url=""
semanticrelation="compliesWith"/>
</metadata> </metadata>
</record> </record>

View File

@ -0,0 +1,88 @@
package eu.dnetlib.dhp.oa.provision;
import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.oa.provision.model.JoinedEntity;
import eu.dnetlib.dhp.oa.provision.utils.ContextMapper;
import eu.dnetlib.dhp.oa.provision.utils.StreamingInputDocumentFactory;
import eu.dnetlib.dhp.oa.provision.utils.XmlRecordFactory;
import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct;
import eu.dnetlib.dhp.utils.saxon.SaxonTransformerFactory;
import org.apache.commons.io.IOUtils;
import org.apache.solr.client.solrj.util.ClientUtils;
import org.apache.solr.common.SolrInputDocument;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.io.SAXReader;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import java.io.IOException;
import java.io.StringReader;
import static org.junit.jupiter.api.Assertions.assertNotNull;
public class EOSCFuture_Test {
public static ObjectMapper OBJECT_MAPPER = new ObjectMapper()
.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
public static final String VERSION = "2021-04-15T10:05:53Z";
public static final String DSID = "b9ee796a-c49f-4473-a708-e7d67b84c16d_SW5kZXhEU1Jlc291cmNlcy9JbmRleERTUmVzb3VyY2VUeXBl";
private ContextMapper contextMapper;
@BeforeEach
public void setUp() {
contextMapper = new ContextMapper();
}
@Test
public void testEOSC_ROHub() throws IOException, DocumentException, TransformerException {
final ContextMapper contextMapper = new ContextMapper();
final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false,
XmlConverterJob.schemaLocation);
final OtherResearchProduct p = OBJECT_MAPPER
.readValue(IOUtils.toString(getClass().getResourceAsStream("eosc-future/photic-zone.json")), OtherResearchProduct.class);
final String xml = xmlRecordFactory.build(new JoinedEntity<>(p));
assertNotNull(xml);
final Document doc = new SAXReader().read(new StringReader(xml));
assertNotNull(doc);
System.out.println(doc.asXML());
testRecordTransformation(xml);
}
private void testRecordTransformation(final String record) throws IOException, TransformerException {
final String fields = IOUtils.toString(getClass().getResourceAsStream("fields.xml"));
final String xslt = IOUtils.toString(getClass().getResourceAsStream("layoutToRecordTransformer.xsl"));
final String transformer = XmlIndexingJob.getLayoutTransformer("DMF", fields, xslt);
final Transformer tr = SaxonTransformerFactory.newInstance(transformer);
final String indexRecordXML = XmlIndexingJob.toIndexRecord(tr, record);
final SolrInputDocument solrDoc = new StreamingInputDocumentFactory(VERSION, DSID)
.parseDocument(indexRecordXML);
final String xmlDoc = ClientUtils.toXML(solrDoc);
Assertions.assertNotNull(xmlDoc);
System.out.println(xmlDoc);
}
}

View File

@ -128,6 +128,20 @@ public class IndexRecordTransformerTest {
testRecordTransformation(record); testRecordTransformation(record);
} }
@Test
public void testForEOSCFutureSoftwareNotebook() throws IOException, TransformerException {
final String record = IOUtils
.toString(getClass().getResourceAsStream("eosc-future/software-justthink.xml"));
testRecordTransformation(record);
}
@Test
public void testForEOSCFutureSoftwareNotebookClaim() throws IOException, TransformerException {
final String record = IOUtils
.toString(getClass().getResourceAsStream("eosc-future/software-justthink-claim.xml"));
testRecordTransformation(record);
}
@Test @Test
void testDoiUrlNormalization() throws MalformedURLException { void testDoiUrlNormalization() throws MalformedURLException {

View File

@ -0,0 +1,305 @@
<record>
<result xmlns:dri="http://www.driver-repository.eu/namespace/dri">
<header xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<dri:objIdentifier>od______2659::3801993ea8f970cfc991277160edf277</dri:objIdentifier>
<dri:dateOfCollection>2022-08-08T03:06:13Z</dri:dateOfCollection>
<status>under curation</status>
<counters/>
</header>
<metadata>
<oaf:entity xmlns:oaf="http://namespace.openaire.eu/oaf"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://namespace.openaire.eu/oaf https://www.openaire.eu/schema/1.0/oaf-1.0.xsd">
<oaf:result>
<title classid="main title" classname="main title"
schemeid="dnet:dataCite_title" schemename="dnet:dataCite_title">JUSThink
Alignment Analysis</title>
<creator rank="1" name="" surname="">Norman, Utku</creator>
<creator rank="2" name="" surname="">Dinkar, Tanvi</creator>
<creator rank="3" name="" surname="">Bruno, Barbara</creator>
<creator rank="4" name="" surname="">Clavel, Chloé</creator>
<dateofacceptance/>
<resulttype classid="software" classname="software"
schemeid="dnet:result_typologies" schemename="dnet:result_typologies"/>
<language classid="eng" classname="English" schemeid="dnet:languages"
schemename="dnet:languages"/>
<description>
<p>
<strong>1. Description</strong>
</p>
<p>This repository contains<strong> tools to automatically analyse how
participants align their use of task-specific referents in their
dialogue and actions for a collaborative learning activity, and how
it relates to the task success</strong> (i.e. their learning
outcomes and task performance).</p>
<p>As a use case, it processes data from a collaborative problem solving
activity named JUSThink <a
href="https://zenodo.org/record/4675070#references">[1, 2]</a>, i.e.
JUSThink Dialogue and Actions Corpus data set that is available from the
Zenodo Repository, DOI: <a href="http://doi.org/10.5281/zenodo.4627104"
>10.5281/zenodo.4627104</a>, and reproduces the results and figures
in <a href="https://zenodo.org/record/4675070#references">[3]</a>.</p>
<p>In brief: </p>
<ol>
<li><strong>JUSThink Dialogue and Actions Corpus</strong> contains
transcripts, event logs, and test responses of children aged 9
through 12, as they participate in the JUSThink activity <a
href="https://zenodo.org/record/4675070#references">[1, 2]</a>
in pairs of two, to solve a problem on graphs together. </li>
<li><strong>The JUSThink activity and its study</strong> is first
described in <a href="https://zenodo.org/record/4675070#references"
>[1]</a>, and elaborated with findings concerning the link
between children&#39;s learning, performance in the activity, and
perception of self, the other and the robot in <a
href="https://zenodo.org/record/4675070#references">[2]</a>. </li>
<li><strong>Alignment analysis in our work <a
href="https://zenodo.org/record/4675070#references"
>[3]</a></strong> studies the participants&#39; use of
expressions that are related to the task at hand, their follow up
actions of these expressions, and how it links to task success.</li>
</ol>
<p>
<strong>2. Publications</strong>
</p>
<p>If you use this work in an academic context, please cite the following
publications:</p>
<ul>
<li>
<p>Norman*, U., Dinkar*, T., Bruno, B., &amp; Clavel, C. (2022).
Studying Alignment in a Collaborative Learning Activity via
Automatic Methods: The Link Between What We Say and Do. Dialogue
&amp; Discourse, 13(2), 1 - ;48. *Contributed equally to this
work. <a href="https://doi.org/10.5210/dad.2022.201"
>https://doi.org/10.5210/dad.2022.201</a></p>
</li>
<li>
<p>Norman, U., Dinkar, T., Bruno, B., &amp; Clavel, C. (2021).
JUSThink Alignment Analysis. In Dialogue &amp; Discourse
(v1.0.0, Vol. 13, Number 2, pp. 1 - ;48). Zenodo. <a
href="https://doi.org/10.5281/zenodo.4675070"
>https://doi.org/10.5281/zenodo.4675070</a></p>
</li>
</ul>
<p>
<strong>3. Content</strong>
</p>
<p>The tools provided in this repository consists of 7 Jupyter Notebooks
written in Python 3, and two additional external tools utilised by the
notebooks.</p>
<p>
<strong>3.1. Jupyter Notebooks</strong>
</p>
<p>We highlight that the notebooks up until the last (i.e. to test the
hypotheses (tools/7_test_the_hypotheses.ipynb)) present a general
pipeline to process event logs, test responses and transcripts to
extract measures of task performance, learning outcomes, and measures of
alignment.</p>
<ol>
<li><strong>Extract task performance (and other features) from the logs
</strong>(tools/1_extract_performance_and_other_features_from_logs.ipynb):
Extracts various measures of task behaviour from the logs, at
varying granularities of the activity (i.e. the whole corpus, task,
attempt, and turn levels). In later notebooks, we focus on one of
the features to estimate the task performance of a team: (minimum)
error.</li>
<li><strong>Extract learning outcomes from the test responses</strong>
(tools/2_extract_learning_gain_from_test_responses.ipynb): Extracts
measures of learning outcomes from the responses to the pre-test and
the post-test. In later notebooks, we focus on one of the features
to estimate the learning outcome of a team: relative learning gain
<a href="https://sandbox.zenodo.org/record/742549#references"
>[4]</a></li>
<li><strong>Select and visualise a subset of teams for
transcription</strong>
(tools/3_visualise_transcribed_teams.ipynb): Visualises the
transcribed teams among the other teams in the feature space spanned
by task performance and learning outcome, as well as the
distribution of their number of attempts and turns.</li>
<li><strong>Extract routines from transcripts</strong>
(tools/4_extract_routines_from_transcripts.ipynb) (uses <a
href="https://github.com/GuillaumeDD/dialign">dialign</a> to
extract routines): Extracts routines of referring expressions that
are &quot;fixed&quot;, i.e. become shared or established amongst
interlocutors.</li>
<li><strong>Combine transcripts with logs</strong>
(tools/5_construct_the_corpus_by_combining_transcripts_with_logs.ipynb):
Merges transcripts with event logs to have a combined dialogue and
actions corpus, to be processed e.g. to detect follow-up
actions.</li>
<li><strong>Recognise instructions and detect follow-up actions</strong>
(tools/6_recognise_instructions_detect_follow-up_actions.ipynb):
Extracts verbalised instruction such as &quot;connect Mount Basel to
Montreux&quot;, and pairs them with the follow-up action that may
<em>match</em> (e.g. if the other connects Basel to Montreux) or
<em>mismatch</em> (e.g. if the other connects Basel to
Neuchatel) with the instruction.</li>
<li><strong>Test the hypotheses </strong>in <a
href="https://sandbox.zenodo.org/record/742549#references"
>[3]</a> (tools/7_test_the_hypotheses.ipynb) (uses
<strong>effsize</strong> to estimate effect size, specifically
Cliff&#39;s Delta): Considers each research questions and hypotheses
studied in <a
href="https://sandbox.zenodo.org/record/742549#references"
>[3]</a> and generates the results in <a
href="https://sandbox.zenodo.org/record/742549#references"
>[3]</a>.</li>
</ol>
<p>
<strong>3.2. External Tools</strong>
</p>
<ol>
<li><strong><a href="https://github.com/GuillaumeDD/dialign">dialign</a>
tool</strong> to extract routines, specifically <a
href="https://github.com/GuillaumeDD/dialign/releases/tag/v1.0"
>Release 1.0</a> from <a
href="https://github.com/GuillaumeDD/dialign/releases/download/v1.0/dialign-1.0.zip"
>dialign-1.0.zip</a>:\n It extracts routine expressions that are
&quot;shared&quot; among the participants from transcripts. \n It is
used as an external module (in accordance with its CeCILL-B License,
see <strong>License</strong>).</li>
<li><strong>effsize tool</strong> to compute estimators of effect
size.\n We specifically use it to compute Cliff&#39;s Delta, which
quantifies the amount difference between two groups of observations,
by computing the Cliff&#39;s Delta statistic.\n It is taken from
project <a
href="https://acclab.github.io/DABEST-python-docs/index.html"
>DABEST</a> (see <strong>License</strong>).</li>
</ol>
<p>
<strong>4. Research Questions and Hypotheses in <a
href="https://sandbox.zenodo.org/record/742549#references"
>[3]</a></strong>
</p>
<ul>
<li><strong>RQ1 Lexical alignment</strong>: How do the interlocutors
<em>use</em> expressions related to the task? Is this associated
with task success? <ul>
<li><strong>H1.1</strong>: Task-specific referents become
routine early for more successful teams.</li>
<li><strong>H1.2</strong>: Hesitation phenomena are more likely
to occur in the vicinity of priming and establishment of
task-specific referents for more successful teams.</li>
</ul>
</li>
<li><strong>RQ2 Behavioural alignment</strong>: How do the interlocutors
<em>follow up</em> these expressions with actions? Is this
associated with task success? <ul>
<li><strong>H2.1</strong>: Instructions are more likely to be
followed by a corresponding action early in the dialogue for
more successful teams.</li>
<li><strong>H2.2</strong>: When instructions are followed by a
corresponding or a different action, the action is more
likely to be in the vicinity of information management
phenomena for more successful teams.</li>
</ul>
</li>
</ul>
<p>The RQs and Hs are addressed in the notebook for testing the hypotheses
(i.e. tools/7_test_the_hypotheses.ipynb).</p>
<p>
<strong>Acknowledgements</strong>
</p>
<p>This project has received funding from the European Union&#39;s Horizon
2020 research and innovation programme under grant agreement No 765955.
Namely, the <a href="https://www.animatas.eu/">ANIMATAS Project</a>.</p>
<p>
<strong>License</strong>
</p>
<p>The whole package is under MIT License, see the <strong>LICENSE</strong>
file.</p>
<p>Classes under the <strong>tools/effsize</strong> package were taken from
project <a href="https://acclab.github.io/DABEST-python-docs/index.html"
><strong>DABEST</strong></a>, Copyright 2016-2020 Joses W. Ho.
These classes are licensed under the BSD 3-Clause Clear License. See
<strong>tools/effsize/LICENSE</strong> file for additional
details.</p>
<p>Classes under the <strong>tools/dialign-1.0</strong> package were taken
from project <strong><a href="https://github.com/GuillaumeDD/dialign"
>dialign</a></strong>. These classes are licensed under the
CeCILL-B License. This package is used as an &quot;external
module&quot;, see<strong> tools/dialign-1.0/LICENSE.txt</strong> for
additional details.</p>
</description>
<country classid="" classname="" schemeid="" schemename=""/>
<subject classid="" classname="" schemeid="" schemename=""/>
<relevantdate classid="" classname="" schemeid="" schemename=""/>
<publisher>Zenodo</publisher>
<embargoenddate/>
<journal issn="" eissn="" lissn="" ep="" iss="" sp="" vol=""/>
<source/>
<fulltext/>
<format/>
<storagedate/>
<resourcetype classid="" classname="" schemeid="" schemename=""/>
<device/>
<size/>
<version/>
<lastmetadataupdate/>
<metadataversionnumber/>
<documentationUrl/>
<codeRepositoryUrl/>
<programmingLanguage classid="" classname="" schemeid="" schemename=""/>
<contactperson/>
<contactgroup/>
<tool/>
<originalId>oai:zenodo.org:4675070</originalId>
<collectedfrom name="ZENODO" id="opendoar____::358aee4cc897452c00244351e4d91f69"/>
<pid classid="oai" classname="Open Archives Initiative"
schemeid="dnet:pid_types" schemename="dnet:pid_types"
>oai:zenodo.org:4675070</pid>
<pid classid="doi" classname="Digital Object Identifier"
schemeid="dnet:pid_types" schemename="dnet:pid_types"
>10.5281/zenodo.4675070</pid>
<bestaccessright classid="OPEN" classname="Open Access"
schemeid="dnet:access_modes" schemename="dnet:access_modes"/>
<eoscifguidelines code="EOSC::Jupyter Notebook" label="EOSC::Jupyter Notebook"
url="" semanticrelation="compliesWith"/>
<datainfo>
<inferred>false</inferred>
<deletedbyinference>false</deletedbyinference>
<trust>0.9</trust>
<inferenceprovenance/>
<provenanceaction classid="user:insert" classname="user:insert"
schemeid="dnet:provenanceActions" schemename="dnet:provenanceActions"/>
</datainfo>
<rels>
<rel inferred="false" trust="0.9" inferenceprovenance=""
provenanceaction="user:claim">
<to class="isProducedBy" scheme="dnet:result_project_relations"
type="project">corda__h2020::c4515ebef538a734cf11f795347f5dac</to>
<code>765955</code>
<acronym>ANIMATAS</acronym>
<title>Advancing intuitive human-machine interaction with human-like
social capabilities for education in schools</title>
<contracttype classid="" classname="" schemeid="" schemename=""/>
<funding>
<funder id="ec__________::EC" shortname="EC"
name="European Commission" jurisdiction=""/>
<funding_level_0 name="H2020"
>ec__________::EC::H2020</funding_level_0>
</funding>
<websiteurl/>
</rel>
</rels>
<children>
<instance id="od______2659::3801993ea8f970cfc991277160edf277">
<instancetype classid="0029" classname="Software"
schemeid="dnet:publication_resource"
schemename="dnet:publication_resource"/>
<collectedfrom name="ZENODO"
id="opendoar____::358aee4cc897452c00244351e4d91f69"/>
<hostedby name="ZENODO"
id="opendoar____::358aee4cc897452c00244351e4d91f69"/>
<accessright classid="OPEN" classname="Open Access"
schemeid="dnet:access_modes" schemename="dnet:access_modes"/>
<dateofacceptance/>
<webresource>
<url>https://zenodo.org/record/4675070</url>
</webresource>
</instance>
</children>
</oaf:result>
</oaf:entity>
</metadata>
</result>
</record>

View File

@ -0,0 +1,429 @@
<record>
<result xmlns:dri="http://www.driver-repository.eu/namespace/dri">
<header xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<dri:objIdentifier>doi_dedup___::c054151b6a8c4f41c7acf160651a6503</dri:objIdentifier>
<dri:dateOfCollection>2022-10-13T00:15:44+0000</dri:dateOfCollection>
<dri:dateOfTransformation>2022-10-13T07:44:29.152Z</dri:dateOfTransformation>
</header>
<metadata>
<oaf:entity xmlns:oaf="http://namespace.openaire.eu/oaf"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://namespace.openaire.eu/oaf https://www.openaire.eu/schema/1.0/oaf-1.0.xsd">
<oaf:result>
<collectedfrom name="ZENODO" id="opendoar____::358aee4cc897452c00244351e4d91f69"/>
<collectedfrom name="Datacite" id="openaire____::9e3be59865b2c1c335d32dae2fe7b254"/>
<originalId>oai:zenodo.org:4675070</originalId>
<originalId>50|od______2659::3801993ea8f970cfc991277160edf277</originalId>
<originalId>oai:zenodo.org:6974562</originalId>
<originalId>50|od______2659::9c87ff4a5e7710052b873088e7265072</originalId>
<originalId>10.5281/zenodo.4675069</originalId>
<pid classid="doi" classname="Digital Object Identifier" schemeid="dnet:pid_types"
schemename="dnet:pid_types" inferred="false"
provenanceaction="sysimport:crosswalk:repository" trust="0.9"
>10.5281/zenodo.4675070</pid>
<pid classid="doi" classname="Digital Object Identifier" schemeid="dnet:pid_types"
schemename="dnet:pid_types" inferred="false"
provenanceaction="sysimport:crosswalk:repository" trust="0.9"
>10.5281/zenodo.6974562</pid>
<pid classid="doi" classname="Digital Object Identifier" schemeid="dnet:pid_types"
schemename="dnet:pid_types" inferred="false"
provenanceaction="sysimport:actionset" trust="0.9">10.5281/zenodo.4675069</pid>
<measure id="influence" score="4.916186E-9" class="C5"/>
<measure id="popularity" score="6.885733E-9" class="C5"/>
<measure id="influence_alt" score="0" class="C5"/>
<measure id="popularity_alt" score="0.0" class="C5"/>
<measure id="impulse" score="0" class="C5"/>
<title classid="main title" classname="main title" schemeid="dnet:dataCite_title"
schemename="dnet:dataCite_title" inferred="false"
provenanceaction="sysimport:crosswalk:repository" trust="0.9">JUSThink Alignment
Analysis</title>
<bestaccessright classid="OPEN" classname="Open Access" schemeid="dnet:access_modes"
schemename="dnet:access_modes"/>
<creator rank="1" name="Utku" surname="Norman" orcid_pending="0000-0002-6802-1444"
>Norman, Utku</creator>
<creator rank="2" name="Tanvi" surname="Dinkar">Dinkar, Tanvi</creator>
<creator rank="3" name="Barbara" surname="Bruno" orcid_pending="0000-0003-0953-7173"
>Bruno, Barbara</creator>
<creator rank="4" name="Chloé" surname="Clavel" orcid_pending="0000-0003-4850-3398"
>Clavel, Chloé</creator>
<dateofacceptance>2022-08-08</dateofacceptance>
<description>&amp;lt;strong>1. Description&amp;lt;/strong> This repository
contains&amp;lt;strong> tools to automatically analyse how participants align
their use of task-specific referents in their dialogue and actions for a
collaborative learning activity, and how it relates to the task
success&amp;lt;/strong> (i.e. their learning outcomes and task performance). As
a use case, it processes data from a collaborative problem solving activity
named JUSThink [1, 2], i.e. JUSThink Dialogue and Actions Corpus data set that
is available from the Zenodo Repository, DOI: 10.5281/zenodo.4627104, and
reproduces the results and figures in [3]. In brief: &amp;lt;strong>JUSThink
Dialogue and Actions Corpus&amp;lt;/strong> contains transcripts, event logs,
and test responses of children aged 9 through 12, as they participate in the
JUSThink activity [1, 2] in pairs of two, to solve a problem on graphs together.
&amp;lt;strong>The JUSThink activity and its study&amp;lt;/strong> is first
described in [1], and elaborated with findings concerning the link between
children's learning, performance in the activity, and perception of self, the
other and the robot in [2]. &amp;lt;strong>Alignment analysis in our work
[3]&amp;lt;/strong> studies the participants' use of expressions that are
related to the task at hand, their follow up actions of these expressions, and
how it links to task success. &amp;lt;strong>Changes in Release
v1.1.0:&amp;lt;/strong> updated with the publication information, finalized
paper structure, research questions and hypotheses as in the published article:
U. Norman*&amp;lt;em>, &amp;lt;/em>T. Dinkar*, B. Bruno, and C. Clavel,
"Studying Alignment in a Collaborative Learning Activity via Automatic Methods:
The Link Between What We Say and Do," Dialogue &amp;amp;amp; Discourse, 13(2),
148. *Contributed equally to this work. 10.5210/dad.2022.201.
&amp;lt;strong>Full Changelog:&amp;lt;/strong>
https://github.com/chili-epfl/justhink-alignment-analysis/compare/v1.0.0...v1.1.0
&amp;lt;strong>2. Publications&amp;lt;/strong> If you use this work in an
academic context, please cite the following publications: Norman*, U., Dinkar*,
T., Bruno, B., &amp;amp;amp; Clavel, C. (2022). Studying Alignment in a
Collaborative Learning Activity via Automatic Methods: The Link Between What We
Say and Do. Dialogue &amp;amp;amp; Discourse, 13(2), 148. *Contributed equally
to this work. https://doi.org/10.5210/dad.2022.201 Norman, U., Dinkar, T.,
Bruno, B., &amp;amp;amp; Clavel, C. (2021). JUSThink Alignment Analysis. In
Dialogue &amp;amp;amp; Discourse (v1.1.0, Vol. 13, Number 2, pp. 148). Zenodo.
https://doi.org/10.5281/zenodo.6974562 &amp;lt;strong>3. Content&amp;lt;/strong>
The tools provided in this repository consists of 7 Jupyter Notebooks written in
Python 3, and two additional external tools utilised by the notebooks.
&amp;lt;strong>3.1. Jupyter Notebooks&amp;lt;/strong> We highlight that the
notebooks up until the last (i.e. to test the hypotheses
(tools/7_test_the_hypotheses.ipynb)) present a general pipeline to process event
logs, test responses and transcripts to extract measures of task performance,
learning outcomes, and measures of alignment. &amp;lt;strong>Extract task
performance (and other features) from the logs
&amp;lt;/strong>(tools/1_extract_performance_and_other_features_from_logs.ipynb):
Extracts various measures of task behaviour from the logs, at varying
granularities of the activity (i.e. the whole corpus, task, attempt, and turn
levels). In later notebooks, we focus on one of the features to estimate the
task performance of a team: (minimum) error. &amp;lt;strong>Extract learning
outcomes from the test responses&amp;lt;/strong>
(tools/2_extract_learning_gain_from_test_responses.ipynb): Extracts measures of
learning outcomes from the responses to the pre-test and the post-test. In later
notebooks, we focus on one of the features to estimate the learning outcome of a
team: relative learning gain [4] &amp;lt;strong>Select and visualise a subset of
teams for transcription&amp;lt;/strong>
(tools/3_visualise_transcribed_teams.ipynb): Visualises the transcribed teams
among the other teams in the feature space spanned by task performance and
learning outcome, as well as the distribution of their number of attempts and
turns. &amp;lt;strong>Extract routines from transcripts&amp;lt;/strong>
(tools/4_extract_routines_from_transcripts.ipynb) (uses dialign to extract
routines): Extracts routines of referring expressions that are "fixed", i.e.
become shared or established amongst interlocutors. &amp;lt;strong>Combine
transcripts with logs&amp;lt;/strong>
(tools/5_construct_the_corpus_by_combining_transcripts_with_logs.ipynb): Merges
transcripts with event logs to have a combined dialogue and actions corpus, to
be processed e.g. to detect follow-up actions. &amp;lt;strong>Recognise
instructions and detect follow-up actions&amp;lt;/strong>
(tools/6_recognise_instructions_detect_follow-up_actions.ipynb): Extracts
verbalised instruction such as "connect Mount Basel to Montreux", and pairs them
with the follow-up action that may &amp;lt;em>match&amp;lt;/em> (e.g. if the
other connects Basel to Montreux) or &amp;lt;em>mismatch&amp;lt;/em> (e.g. if
the other connects Basel to Neuchatel) with the instruction. &amp;lt;strong>Test
the hypotheses &amp;lt;/strong>in [3] (tools/7_test_the_hypotheses.ipynb) (uses
&amp;lt;strong>effsize&amp;lt;/strong> to estimate effect size, specifically
Cliff's Delta): Considers each research questions and hypotheses studied in [3]
and generates the results in [3]. &amp;lt;strong>3.2. External
Tools&amp;lt;/strong> &amp;lt;strong>dialign tool&amp;lt;/strong> to extract
routines, specifically Release 1.0 from dialign-1.0.zip:&amp;lt;br> It extracts
routine expressions that are "shared" among the participants from transcripts.
&amp;lt;br> It is used as an external module (in accordance with its CeCILL-B
License, see &amp;lt;strong>License&amp;lt;/strong>). &amp;lt;strong>effsize
tool&amp;lt;/strong> to compute estimators of effect size.&amp;lt;br> We
specifically use it to compute Cliff's Delta, which quantifies the amount
difference between two groups of observations, by computing the Cliff's Delta
statistic.&amp;lt;br> It is taken from project DABEST (see
&amp;lt;strong>License&amp;lt;/strong>). &amp;lt;strong>4. Research Questions
and Hypotheses in [3]&amp;lt;/strong> &amp;lt;strong>RQ1 Lexical
alignment&amp;lt;/strong>: How do the interlocutors &amp;lt;em>use&amp;lt;/em>
expressions related to the task? Is this associated with task success?
&amp;lt;strong>H1.1&amp;lt;/strong>: Task-specific referents become routine
early for more successful teams. &amp;lt;strong>H1.2&amp;lt;/strong>: Hesitation
phenomena are more likely to occur in the vicinity of priming and establishment
of task-specific referents for more successful teams. &amp;lt;strong>RQ2
Behavioural alignment&amp;lt;/strong>: How do the interlocutors
&amp;lt;em>follow up&amp;lt;/em> these expressions with actions? Is this
associated with task success? &amp;lt;strong>H2.1&amp;lt;/strong>: Instructions
are more likely to be followed by a corresponding action early in the dialogue
for more successful teams. &amp;lt;strong>H2.2&amp;lt;/strong>: When
instructions are followed by a corresponding or a different action, the action
is more likely to be in the vicinity of information management phenomena for
more successful teams. The RQs and Hs are addressed in the notebook for testing
the hypotheses (i.e. tools/7_test_the_hypotheses.ipynb).
&amp;lt;strong>Acknowledgements&amp;lt;/strong> This project has received
funding from the European Union's Horizon 2020 research and innovation programme
under grant agreement No 765955. Namely, the ANIMATAS Project.
&amp;lt;strong>License&amp;lt;/strong> The whole package is under MIT License,
see the &amp;lt;strong>LICENSE&amp;lt;/strong> file. Classes under the
&amp;lt;strong>tools/effsize&amp;lt;/strong> package were taken from project
&amp;lt;strong>DABEST&amp;lt;/strong>, Copyright 2016-2020 Joses W. Ho. These
classes are licensed under the BSD 3-Clause Clear License. See
&amp;lt;strong>tools/effsize/LICENSE&amp;lt;/strong> file for additional
details. Classes under the &amp;lt;strong>tools/dialign-1.0&amp;lt;/strong>
package were taken from project &amp;lt;strong>dialign&amp;lt;/strong>. These
classes are licensed under the CeCILL-B License. This package is used as an
"external module", see&amp;lt;strong>
tools/dialign-1.0/LICENSE.txt&amp;lt;/strong> for additional
details.</description>
<description>{"references": ["[1] J. Nasir, U. Norman, B. Bruno, and P. Dillenbourg,
\"You Tell, I Do, and We Swap until we Connect All the Gold Mines!,\" ERCIM
News, vol. 2020, no. 120, 2020, [Online]. Available:
https://ercim-news.ercim.eu/en120/special/you-tell-i-do-and-we-swap-until-we-connect-all-the-gold-mines",
"[2] J. Nasir*, U. Norman*, B. Bruno, and P. Dillenbourg, \"When Positive
Perception of the Robot Has No Effect on Learning,\" in 2020 29th IEEE
International Conference on Robot and Human Interactive Communication (RO-MAN),
Aug. 2020, pp. 313\u2013320, doi: 10.1109/RO-MAN47096.2020.9223343", "[3] U.
Norman*, T. Dinkar*, B. Bruno, and C. Clavel, \"Studying Alignment in a
Collaborative Learning Activity via Automatic Methods: The Link Between What We
Say and Do,\" Dialogue &amp;amp;amp; Discourse, vol. 13, no. 2, pp. 1\u201348,
Aug. 2022, doi: 10.5210/dad.2022.201.", "[4] M. Sangin, G. Molinari, M.-A.
N\u00fcssli, and P. Dillenbourg, \"Facilitating peer knowledge modeling: Effects
of a knowledge awareness tool on collaborative learning outcomes and
processes,\"\" Computers in Human Behavior, vol. 27, no. 3, pp. 1059\u20131067,
May 2011, doi: 10.1016/j.chb.2010.05.032."]}</description>
<subject classid="keyword" classname="keyword"
schemeid="dnet:subject_classification_typologies"
schemename="dnet:subject_classification_typologies" inferred="false"
provenanceaction="sysimport:crosswalk:repository" trust="0.9"
>alignment</subject>
<subject classid="keyword" classname="keyword"
schemeid="dnet:subject_classification_typologies"
schemename="dnet:subject_classification_typologies" inferred="false"
provenanceaction="sysimport:crosswalk:repository" trust="0.9">situated
dialogue</subject>
<subject classid="keyword" classname="keyword"
schemeid="dnet:subject_classification_typologies"
schemename="dnet:subject_classification_typologies" inferred="false"
provenanceaction="sysimport:crosswalk:repository" trust="0.9">collaborative
learning</subject>
<subject classid="keyword" classname="keyword"
schemeid="dnet:subject_classification_typologies"
schemename="dnet:subject_classification_typologies" inferred="false"
provenanceaction="sysimport:crosswalk:repository" trust="0.9">spontaneous
speech</subject>
<subject classid="keyword" classname="keyword"
schemeid="dnet:subject_classification_typologies"
schemename="dnet:subject_classification_typologies" inferred="false"
provenanceaction="sysimport:crosswalk:repository" trust="0.9"
>disfluency</subject>
<subject classid="keyword" classname="keyword"
schemeid="dnet:subject_classification_typologies"
schemename="dnet:subject_classification_typologies" inferred="false"
provenanceaction="sysimport:crosswalk:repository" trust="0.9">mutual
understanding</subject>
<language classid="eng" classname="English" schemeid="dnet:languages"
schemename="dnet:languages"/>
<relevantdate classid="issued" classname="issued" schemeid="dnet:dataCite_date"
schemename="dnet:dataCite_date" inferred="false"
provenanceaction="sysimport:crosswalk:repository" trust="0.9"
>2021-04-09</relevantdate>
<relevantdate classid="issued" classname="issued" schemeid="dnet:dataCite_date"
schemename="dnet:dataCite_date" inferred="false"
provenanceaction="sysimport:crosswalk:repository" trust="0.9"
>2022-08-08</relevantdate>
<publisher>Zenodo</publisher>
<resulttype classid="software" classname="software"
schemeid="dnet:result_typologies" schemename="dnet:result_typologies"/>
<resourcetype classid="UNKNOWN" classname="UNKNOWN"
schemeid="dnet:dataCite_resource" schemename="dnet:dataCite_resource"/>
<programmingLanguage/>
<context id="EC" label="European Commission" type="funding">
<category id="EC::H2020" label="Horizon 2020 Framework Programme">
<concept id="EC::H2020::MSCA-ITN-ETN" label="European Training Networks"/>
</category>
</context>
<eoscifguidelines code="EOSC::Jupyter Notebook"
label="EOSC::Jupyter Notebook"
url=""
semanticrelation="compliesWith"/>
<datainfo>
<inferred>true</inferred>
<deletedbyinference>false</deletedbyinference>
<trust>0.8</trust>
<inferenceprovenance>dedup-result-decisiontree-v3</inferenceprovenance>
<provenanceaction classid="sysimport:dedup" classname="Inferred by OpenAIRE"
schemeid="dnet:provenanceActions" schemename="dnet:provenanceActions"/>
</datainfo>
<rels>
<rel inferred="false" trust="0.9" inferenceprovenance=""
provenanceaction="sysimport:actionset">
<to class="IsSupplementTo" scheme="dnet:result_result_relations"
type="publication">doi_dedup___::ae235765bbc422195a6c9f632b2d77eb</to>
<collectedfrom name="arXiv.org e-Print Archive"
id="opendoar____::6f4922f45568161a8cdf4ad2299f6d23"/>
<pid classid="arXiv" classname="arXiv" schemeid="dnet:pid_types"
schemename="dnet:pid_types" inferred="false"
provenanceaction="sysimport:crosswalk:repository" trust="0.9"
>2104.04429</pid>
<collectedfrom name="Infoscience - EPFL scientific publications"
id="opendoar____::eecca5b6365d9607ee5a9d336962c534"/>
<publisher>arXiv</publisher>
<collectedfrom name="Crossref"
id="openaire____::081b82f96300b6a6e3d282bad31cb6e2"/>
<dateofacceptance>2022-08-05</dateofacceptance>
<title classid="main title" classname="main title"
schemeid="dnet:dataCite_title" schemename="dnet:dataCite_title">Studying
Alignment in a Collaborative Learning Activity via Automatic Methods:
The Link Between What We Say and Do</title>
<collectedfrom name="ORCID"
id="openaire____::806360c771262b4d6770e7cdf04b5c5a"/>
<collectedfrom name="Datacite"
id="openaire____::9e3be59865b2c1c335d32dae2fe7b254"/>
<pid classid="doi" classname="Digital Object Identifier"
schemeid="dnet:pid_types" schemename="dnet:pid_types" inferred="false"
provenanceaction="sysimport:actionset" trust="0.9"
>10.48550/arxiv.2104.04429</pid>
<pid classid="doi" classname="Digital Object Identifier"
schemeid="dnet:pid_types" schemename="dnet:pid_types"
>10.5210/dad.2022.201</pid>
</rel>
<rel inferred="false" trust="0.9" inferenceprovenance=""
provenanceaction="sysimport:actionset">
<to class="isProducedBy" scheme="dnet:result_project_relations"
type="project">corda__h2020::c4515ebef538a734cf11f795347f5dac</to>
<title>Advancing intuitive human-machine interaction with human-like social
capabilities for education in schools</title>
<code>765955</code>
<funding>
<funder id="ec__________::EC" shortname="EC" name="European Commission"
jurisdiction="EU"/>
<funding_level_0 name="H2020">ec__________::EC::H2020</funding_level_0>
<funding_level_1 name="MSCA-ITN-ETN"
>ec__________::EC::H2020::MSCA-ITN-ETN</funding_level_1>
</funding>
<acronym>ANIMATAS</acronym>
</rel>
<rel inferred="false" trust="0.9" inferenceprovenance=""
provenanceaction="sysimport:actionset">
<to class="IsSupplementedBy" scheme="dnet:result_result_relations"
type="dataset">doi_dedup___::0a6314b0ed275d915f5b57a259375691</to>
<dateofacceptance>2021-03-22</dateofacceptance>
<publisher>Zenodo</publisher>
<pid classid="doi" classname="Digital Object Identifier"
schemeid="dnet:pid_types" schemename="dnet:pid_types" inferred="false"
provenanceaction="sysimport:crosswalk:repository" trust="0.9"
>10.5281/zenodo.4627104</pid>
<title classid="main title" classname="main title"
schemeid="dnet:dataCite_title" schemename="dnet:dataCite_title"
inferred="false" provenanceaction="sysimport:crosswalk:repository"
trust="0.9">JUSThink Dialogue and Actions Corpus</title>
<pid classid="doi" classname="Digital Object Identifier"
schemeid="dnet:pid_types" schemename="dnet:pid_types" inferred="false"
provenanceaction="sysimport:actionset" trust="0.9"
>10.5281/zenodo.4627103</pid>
<collectedfrom name="ZENODO"
id="opendoar____::358aee4cc897452c00244351e4d91f69"/>
<collectedfrom name="Datacite"
id="openaire____::9e3be59865b2c1c335d32dae2fe7b254"/>
</rel>
</rels>
<children>
<result objidentifier="doi_________::c054151b6a8c4f41c7acf160651a6503">
<publisher>Zenodo</publisher>
<pid classid="doi" classname="Digital Object Identifier"
schemeid="dnet:pid_types" schemename="dnet:pid_types" inferred="false"
provenanceaction="sysimport:crosswalk:repository" trust="0.9"
>10.5281/zenodo.4675070</pid>
<title classid="main title" classname="main title"
schemeid="dnet:dataCite_title" schemename="dnet:dataCite_title"
inferred="false" provenanceaction="sysimport:crosswalk:repository"
trust="0.9">JUSThink Alignment Analysis</title>
<dateofacceptance>2021-04-09</dateofacceptance>
<collectedfrom name="ZENODO"
id="opendoar____::358aee4cc897452c00244351e4d91f69"/>
</result>
<result objidentifier="doi_________::04aaa160a921cafdc90e03483de0a26f">
<dateofacceptance>2022-08-08</dateofacceptance>
<publisher>Zenodo</publisher>
<pid classid="doi" classname="Digital Object Identifier"
schemeid="dnet:pid_types" schemename="dnet:pid_types" inferred="false"
provenanceaction="sysimport:crosswalk:repository" trust="0.9"
>10.5281/zenodo.6974562</pid>
<collectedfrom name="ZENODO"
id="opendoar____::358aee4cc897452c00244351e4d91f69"/>
<title classid="main title" classname="main title"
schemeid="dnet:dataCite_title" schemename="dnet:dataCite_title"
inferred="false" provenanceaction="sysimport:crosswalk:repository"
trust="0.9">JUSThink Alignment Analysis (v1.1.0)</title>
</result>
<result objidentifier="doi_________::684a8fbe0ff09f288e9d29db897233bb">
<title classid="main title" classname="main title"
schemeid="dnet:dataCite_title" schemename="dnet:dataCite_title">JUSThink
Alignment Analysis (v1.1.0)</title>
<dateofacceptance>2022-08-08</dateofacceptance>
<publisher>Zenodo</publisher>
<pid classid="doi" classname="Digital Object Identifier"
schemeid="dnet:pid_types" schemename="dnet:pid_types" inferred="false"
provenanceaction="sysimport:actionset" trust="0.9"
>10.5281/zenodo.4675069</pid>
<collectedfrom name="Datacite"
id="openaire____::9e3be59865b2c1c335d32dae2fe7b254"/>
</result>
<instance>
<accessright classid="OPEN" classname="Open Access"
schemeid="dnet:access_modes" schemename="dnet:access_modes"/>
<collectedfrom name="Datacite"
id="openaire____::9e3be59865b2c1c335d32dae2fe7b254"/>
<hostedby name="ZENODO" id="opendoar____::358aee4cc897452c00244351e4d91f69"/>
<dateofacceptance>2022-08-08</dateofacceptance>
<instancetype classid="0029" classname="Software"
schemeid="dnet:publication_resource"
schemename="dnet:publication_resource"/>
<pid classid="doi" classname="Digital Object Identifier"
schemeid="dnet:pid_types" schemename="dnet:pid_types" inferred="false"
provenanceaction="sysimport:actionset" trust="0.9"
>10.5281/zenodo.4675069</pid>
<refereed classid="0000" classname="UNKNOWN" schemeid="dnet:review_levels"
schemename="dnet:review_levels"/>
<license>https://opensource.org/licenses/MIT</license>
<webresource>
<url>https://doi.org/10.5281/zenodo.4675069</url>
</webresource>
</instance>
<instance>
<accessright classid="OPEN" classname="Open Access"
schemeid="dnet:access_modes" schemename="dnet:access_modes"/>
<collectedfrom name="ZENODO"
id="opendoar____::358aee4cc897452c00244351e4d91f69"/>
<hostedby name="ZENODO" id="opendoar____::358aee4cc897452c00244351e4d91f69"/>
<dateofacceptance>2022-08-08</dateofacceptance>
<instancetype classid="0029" classname="Software"
schemeid="dnet:publication_resource"
schemename="dnet:publication_resource"/>
<pid classid="doi" classname="Digital Object Identifier"
schemeid="dnet:pid_types" schemename="dnet:pid_types" inferred="false"
provenanceaction="sysimport:crosswalk:repository" trust="0.9"
>10.5281/zenodo.6974562</pid>
<refereed classid="0000" classname="UNKNOWN" schemeid="dnet:review_levels"
schemename="dnet:review_levels"/>
<license>https://opensource.org/licenses/MIT</license>
<webresource>
<url>https://doi.org/10.5281/zenodo.6974562</url>
</webresource>
</instance>
<instance>
<accessright classid="OPEN" classname="Open Access"
schemeid="dnet:access_modes" schemename="dnet:access_modes"/>
<collectedfrom name="ZENODO"
id="opendoar____::358aee4cc897452c00244351e4d91f69"/>
<hostedby name="ZENODO" id="opendoar____::358aee4cc897452c00244351e4d91f69"/>
<dateofacceptance>2021-04-09</dateofacceptance>
<instancetype classid="0029" classname="Software"
schemeid="dnet:publication_resource"
schemename="dnet:publication_resource"/>
<pid classid="doi" classname="Digital Object Identifier"
schemeid="dnet:pid_types" schemename="dnet:pid_types" inferred="false"
provenanceaction="sysimport:crosswalk:repository" trust="0.9"
>10.5281/zenodo.4675070</pid>
<refereed classid="0000" classname="UNKNOWN" schemeid="dnet:review_levels"
schemename="dnet:review_levels"/>
<license>https://opensource.org/licenses/MIT</license>
<webresource>
<url>https://doi.org/10.5281/zenodo.4675070</url>
</webresource>
</instance>
</children>
</oaf:result>
</oaf:entity>
</metadata>
</result>
</record>

View File

@ -2,11 +2,11 @@
<FIELDS> <FIELDS>
<FIELD indexable="false" name="oafentity" result="true" stat="false" tokenizable="false" xpath="//*[local-name() = 'entity']"/> <FIELD indexable="false" name="oafentity" result="true" stat="false" tokenizable="false" xpath="//*[local-name() = 'entity']"/>
<FIELD indexable="true" name="oaftype" result="false" stat="false" tokenizable="false" value="local-name(//*[local-name()='entity']/*[local-name() != 'extraInfo'])"/> <FIELD indexable="true" name="oaftype" result="false" stat="false" tokenizable="false" value="local-name(//*[local-name()='entity']/*[local-name() != 'extraInfo'])"/>
<FIELD indexable="true" name="objIdentifier" result="false" stat="false" tokenizable="false" xpath="//header/dri:objIdentifier"/><!-- DATASOURCE FIELDS --> <FIELD indexable="true" name="objIdentifier" result="false" stat="false" tokenizable="false" xpath="//header/dri:objIdentifier"/><!-- DATASOURCE FIELDS -->
<FIELD indexable="true" name="datasourceofficialname" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/officialname"/> <FIELD copy="true" indexable="true" name="datasourceofficialname" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/officialname"/>
<FIELD indexable="true" name="datasourceenglishname" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/englishname"/> <FIELD copy="true" indexable="true" name="datasourceenglishname" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/englishname"/>
<FIELD indexable="true" name="datasourceoddescription" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/oddescription"/> <FIELD copy="true" indexable="true" name="datasourceoddescription" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/oddescription"/>
<FIELD indexable="true" name="datasourceodsubjects" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/odsubjects"/> <FIELD copy="true" indexable="true" name="datasourceodsubjects" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/odsubjects"/>
<FIELD indexable="true" name="datasourceodlanguages" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/odlanguages"/> <FIELD indexable="true" name="datasourceodlanguages" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/odlanguages"/>
<FIELD indexable="true" name="datasourceodcontenttypes" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/odcontenttypes"/> <FIELD indexable="true" name="datasourceodcontenttypes" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/odcontenttypes"/>
<FIELD indexable="true" multivalued="false" name="datasourcetypename" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/datasourcetype/@classname"/> <FIELD indexable="true" multivalued="false" name="datasourcetypename" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/datasourcetype/@classname"/>
@ -14,17 +14,16 @@
<FIELD indexable="true" multivalued="false" name="datasourcetypeuiname" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/datasourcetypeui/@classname"/> <FIELD indexable="true" multivalued="false" name="datasourcetypeuiname" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/datasourcetypeui/@classname"/>
<FIELD indexable="true" multivalued="false" name="datasourcecompatibilityid" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/openairecompatibility/@classid"/> <FIELD indexable="true" multivalued="false" name="datasourcecompatibilityid" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/openairecompatibility/@classid"/>
<FIELD indexable="true" multivalued="false" name="datasourcecompatibilityname" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/openairecompatibility/@classname"/> <FIELD indexable="true" multivalued="false" name="datasourcecompatibilityname" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/openairecompatibility/@classname"/>
<FIELD indexable="true" multivalued="true" name="datasourcesubject" result="false" stat="false" type="ngramtext" xpath="//*[local-name()='entity']/*[local-name()='datasource']/subjects"/> <FIELD copy="true" indexable="true" multivalued="true" name="datasourcesubject" result="false" stat="false" type="ngramtext" xpath="//*[local-name()='entity']/*[local-name()='datasource']/subjects"/>
<FIELD indexable="true" name="versioning" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/versioning"/> <FIELD indexable="true" name="versioning" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/versioning"/><!-- datasource fields for EOSC -->
<!-- datasource fields for EOSC --> <FIELD indexable="true" name="datasourcejurisdiction" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/jurisdiction/@classname"/>
<FIELD indexable="true" name="datasourcejurisdiction" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/jurisdiction"/>
<FIELD indexable="true" name="datasourcethematic" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/thematic"/> <FIELD indexable="true" name="datasourcethematic" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/thematic"/>
<FIELD indexable="true" name="datasourceknowledge_graph" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/knowledgegraph"/> <FIELD indexable="true" name="datasourcecontentpolicy" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/contentpolicy/@classname"/>
<FIELD indexable="true" name="datasourcecontentpolicy" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/contentpolicy"/> <FIELD indexable="true" name="eosctype" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/eosctype/@classname"/>
<!-- ORGANIZATION FIELDS --> <FIELD indexable="true" name="eoscdatasourcetype" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/eoscdatasourcetype/@classname"/><!-- ORGANIZATION FIELDS --><!-- ORGANIZATION FIELDS --><!-- ORGANIZATION FIELDS -->
<FIELD indexable="true" name="organizationlegalshortname" result="false" stat="false" type="ngramtext" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='organization']//legalshortname)"/> <FIELD copy="true" indexable="true" name="organizationlegalshortname" result="false" stat="false" type="ngramtext" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='organization']//legalshortname)"/>
<FIELD indexable="true" name="organizationlegalname" result="false" stat="false" type="ngramtext" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='organization']//legalname)"/> <FIELD copy="true" indexable="true" name="organizationlegalname" result="false" stat="false" type="ngramtext" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='organization']//legalname)"/>
<FIELD indexable="true" name="organizationalternativenames" result="false" stat="false" type="ngramtext" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='organization']//alternativeNames)"/> <FIELD copy="true" indexable="true" name="organizationalternativenames" result="false" stat="false" type="ngramtext" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='organization']//alternativeNames)"/>
<FIELD indexable="true" name="organizationeclegalbody" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='organization']/eclegalbody"/> <FIELD indexable="true" name="organizationeclegalbody" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='organization']/eclegalbody"/>
<FIELD indexable="true" name="organizationeclegalperson" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='organization']/eclegalperson"/> <FIELD indexable="true" name="organizationeclegalperson" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='organization']/eclegalperson"/>
<FIELD indexable="true" name="organizationecnonprofit" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='organization']/ecnonprofit"/> <FIELD indexable="true" name="organizationecnonprofit" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='organization']/ecnonprofit"/>
@ -34,18 +33,17 @@
<FIELD indexable="true" name="organizationecenterprise" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='organization']/ecenterprise"/> <FIELD indexable="true" name="organizationecenterprise" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='organization']/ecenterprise"/>
<FIELD indexable="true" name="organizationecsmevalidated" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='organization']/ecsmevalidated"/> <FIELD indexable="true" name="organizationecsmevalidated" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='organization']/ecsmevalidated"/>
<FIELD indexable="true" name="organizationecnutscode" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='organization']/ecnutscode"/> <FIELD indexable="true" name="organizationecnutscode" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='organization']/ecnutscode"/>
<FIELD indexable="true" multivalued="false" name="organizationcountryname" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='organization']/country/@classname"/> <FIELD indexable="true" multivalued="false" name="organizationcountryname" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='organization']/country/@classname"/><!-- PROJECT FIELDS -->
<!-- PROJECT FIELDS --> <FIELD copy="true" indexable="true" name="projectcode" result="false" stat="false" type="ngramtext" xpath="//*[local-name()='entity']/*[local-name()='project']/code"/>
<FIELD indexable="true" name="projectcode" result="false" stat="false" type="ngramtext" xpath="//*[local-name()='entity']/*[local-name()='project']/code"/>
<FIELD indexable="true" name="projectcode_nt" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/code"/> <FIELD indexable="true" name="projectcode_nt" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/code"/>
<FIELD indexable="true" name="projectacronym" result="false" stat="false" type="ngramtext" xpath="//*[local-name()='entity']/*[local-name()='project']/acronym"/> <FIELD copy="true" indexable="true" name="projectacronym" result="false" stat="false" type="ngramtext" xpath="//*[local-name()='entity']/*[local-name()='project']/acronym"/>
<FIELD indexable="true" name="projecttitle" result="false" stat="false" type="ngramtext" xpath="//*[local-name()='entity']/*[local-name()='project']/title"/> <FIELD copy="true" indexable="true" name="projecttitle" result="false" stat="false" type="ngramtext" xpath="//*[local-name()='entity']/*[local-name()='project']/title"/>
<FIELD indexable="true" multivalued="false" name="projectstartdate" result="false" stat="false" type="pdate" value="//*[local-name()='entity']/*[local-name()='project']/startdate"/> <FIELD indexable="true" multivalued="false" name="projectstartdate" result="false" stat="false" type="date" value="//*[local-name()='entity']/*[local-name()='project']/startdate"/>
<FIELD indexable="true" multivalued="false" name="projectstartyear" result="false" stat="false" tokenizable="false" value="dnet:extractYear(//*[local-name()='entity']/*[local-name()='project']/startdate)"/> <FIELD indexable="true" multivalued="false" name="projectstartyear" result="false" stat="false" tokenizable="false" value="dnet:extractYear(//*[local-name()='entity']/*[local-name()='project']/startdate)"/>
<FIELD indexable="true" multivalued="false" name="projectenddate" result="false" stat="false" type="pdate" value="//*[local-name()='entity']/*[local-name()='project']/enddate"/> <FIELD indexable="true" multivalued="false" name="projectenddate" result="false" stat="false" type="date" value="//*[local-name()='entity']/*[local-name()='project']/enddate"/>
<FIELD indexable="true" multivalued="false" name="projectendyear" result="false" stat="false" tokenizable="false" value="dnet:extractYear(//*[local-name()='entity']/*[local-name()='project']/enddate)"/> <FIELD indexable="true" multivalued="false" name="projectendyear" result="false" stat="false" tokenizable="false" value="dnet:extractYear(//*[local-name()='entity']/*[local-name()='project']/enddate)"/>
<FIELD indexable="true" multivalued="false" name="projectcallidentifier" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/callidentifier"/> <FIELD indexable="true" multivalued="false" name="projectcallidentifier" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/callidentifier"/>
<FIELD indexable="true" name="projectkeywords" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='project']/keywords"/> <FIELD copy="true" indexable="true" name="projectkeywords" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='project']/keywords"/>
<FIELD indexable="true" multivalued="false" name="projectduration" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/duration"/> <FIELD indexable="true" multivalued="false" name="projectduration" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/duration"/>
<FIELD indexable="true" multivalued="false" name="projectecsc39" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='project']/ecsc39)"/> <FIELD indexable="true" multivalued="false" name="projectecsc39" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='project']/ecsc39)"/>
<FIELD indexable="true" multivalued="false" name="projectoamandatepublications" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/oamandatepublications"/> <FIELD indexable="true" multivalued="false" name="projectoamandatepublications" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/oamandatepublications"/>
@ -54,35 +52,36 @@
<FIELD indexable="true" multivalued="false" name="projectcontracttypename" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/contracttype/@classname"/> <FIELD indexable="true" multivalued="false" name="projectcontracttypename" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/contracttype/@classname"/>
<FIELD indexable="true" name="fundinglevel0_id" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree//funding_level_0/id"/> <FIELD indexable="true" name="fundinglevel0_id" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree//funding_level_0/id"/>
<FIELD indexable="true" name="fundinglevel0_name" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree//funding_level_0/name"/> <FIELD indexable="true" name="fundinglevel0_name" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree//funding_level_0/name"/>
<FIELD indexable="true" name="fundinglevel0_description" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree//funding_level_0/description"/> <FIELD copy="true" indexable="true" name="fundinglevel0_description" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree//funding_level_0/description"/>
<FIELD indexable="true" name="fundinglevel1_id" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree//funding_level_1/id"/> <FIELD indexable="true" name="fundinglevel1_id" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree//funding_level_1/id"/>
<FIELD indexable="true" name="fundinglevel1_name" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree//funding_level_1/name"/> <FIELD indexable="true" name="fundinglevel1_name" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree//funding_level_1/name"/>
<FIELD indexable="true" name="fundinglevel1_description" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree//funding_level_1/description"/> <FIELD copy="true" indexable="true" name="fundinglevel1_description" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree//funding_level_1/description"/>
<FIELD indexable="true" name="fundinglevel2_id" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree//funding_level_2/id"/> <FIELD indexable="true" name="fundinglevel2_id" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree//funding_level_2/id"/>
<FIELD indexable="true" name="fundinglevel2_name" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree//funding_level_2/name"/> <FIELD indexable="true" name="fundinglevel2_name" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree//funding_level_2/name"/>
<FIELD indexable="true" name="fundinglevel2_description" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree//funding_level_2/description"/><!-- PROJECTS' FUNDER FIELDS: indexable only with the new funding path/context handling --> <FIELD copy="true" indexable="true" name="fundinglevel2_description" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree//funding_level_2/description"/><!-- PROJECTS' FUNDER FIELDS: indexable only with the new funding path/context handling -->
<FIELD indexable="true" name="funder" result="false" stat="false" tokenizable="false" value="concat(./id/text(), '||', ./name/text(), '||', ./shortname/text())" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree/funder"/> <FIELD indexable="true" name="funder" result="false" stat="false" tokenizable="false" value="concat(./id/text(), '||', ./name/text(), '||', ./shortname/text())" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree/funder"/>
<FIELD indexable="true" name="fundershortname" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree/funder/shortname"/> <FIELD indexable="true" name="fundershortname" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree/funder/shortname"/>
<FIELD indexable="true" name="funderid" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree/funder/id"/> <FIELD indexable="true" name="funderid" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree/funder/id"/>
<FIELD indexable="true" name="fundername" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree/funder/name"/> <FIELD indexable="true" name="fundername" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree/funder/name"/>
<FIELD indexable="true" name="funderoriginalname" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree/funder/originalname"/> <FIELD indexable="true" name="funderoriginalname" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree/funder/originalname"/>
<FIELD indexable="true" name="funderjurisdiction" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree/funder/jurisdiction"/><!-- RESULT FIELDS --> <FIELD indexable="true" name="funderjurisdiction" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/fundingtree/funder/jurisdiction"/><!-- RESULT FIELDS -->
<FIELD indexable="true" name="resulttitle" result="false" stat="false" xpath="//*[local-name() = 'entity']/*[local-name() ='result']/title | //*[local-name()='entity']/*[local-name()='result']/children/result/title"/> <FIELD copy="true" indexable="true" name="resulttitle" result="false" stat="false" type="text_en" xpath="//*[local-name() = 'entity']/*[local-name() ='result']/title | //*[local-name()='entity']/*[local-name()='result']/children/result/title"/>
<FIELD indexable="true" name="resultsubject" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/subject)"/> <FIELD indexable="true" name="resultsubject" result="false" stat="false" type="text_en" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/subject)"/>
<FIELD indexable="true" name="resultsubjectclass" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/subject/@classname)"/> <FIELD indexable="true" name="resultsubjectclass" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/subject/@classname)"/>
<FIELD indexable="true" multivalued="false" name="resultembargoenddate" result="false" stat="false" type="pdate" value="//*[local-name()='entity']/*[local-name()='result']/embargoenddate"/> <FIELD indexable="true" multivalued="false" name="resultembargoenddate" result="false" stat="false" type="date" value="//*[local-name()='entity']/*[local-name()='result']/embargoenddate"/>
<FIELD indexable="true" multivalued="false" name="resultembargoendyear" result="false" stat="false" tokenizable="false" value="dnet:extractYear(//*[local-name()='entity']/*[local-name()='result']/embargoenddate)"/> <FIELD indexable="true" multivalued="false" name="resultembargoendyear" result="false" stat="false" tokenizable="false" value="dnet:extractYear(//*[local-name()='entity']/*[local-name()='result']/embargoenddate)"/>
<FIELD indexable="true" multivalued="false" name="resulttypeid" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='result']/resulttype/@classid"/> <FIELD indexable="true" multivalued="false" name="resulttypeid" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='result']/resulttype/@classid"/>
<FIELD indexable="true" multivalued="false" name="resulttypename" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='result']/resulttype/@classname"/> <FIELD indexable="true" multivalued="false" name="resulttypename" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='result']/resulttype/@classname"/>
<FIELD indexable="true" multivalued="false" name="resultlanguagename" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='result']/language/@classname"/> <FIELD indexable="true" multivalued="false" name="resultlanguagename" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='result']/language/@classname"/>
<FIELD indexable="true" name="resultpublisher" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='result']/*[local-name()='publisher']"/> <FIELD copy="true" indexable="true" name="resultpublisher" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='result']/*[local-name()='publisher']"/>
<FIELD indexable="true" name="resultdescription" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='result']//*[local-name()='description']"/> <FIELD copy="true" indexable="true" name="resultdescription" result="false" stat="false" type="text_en" xpath="//*[local-name()='entity']/*[local-name()='result']//*[local-name()='description']"/>
<FIELD indexable="true" name="resultlicense" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='result']/children/instance/license"/> <FIELD indexable="true" name="resultlicense" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='result']/children/instance/license"/>
<FIELD indexable="true" name="resultaccessright" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='result']/children/instance/accessright/@classname"/> <FIELD indexable="true" name="resultaccessright" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='result']/children/instance/accessright/@classname"/>
<FIELD indexable="true" name="resultresourcetypename" result="false" stat="false" type="string_ci" xpath="//*[local-name()='entity']/*[local-name()='result']/resourcetype/@classname"/>
<FIELD indexable="true" multivalued="false" name="resultbestaccessright" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/bestaccessright/@classname)"/> <FIELD indexable="true" multivalued="false" name="resultbestaccessright" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/bestaccessright/@classname)"/>
<FIELD indexable="true" multivalued="false" name="resultdateofacceptance" result="false" stat="false" type="pdate" value="//*[local-name()='entity']/*[local-name()='result']/dateofacceptance"/> <FIELD indexable="true" multivalued="false" name="resultdateofacceptance" result="false" stat="false" type="date" value="//*[local-name()='entity']/*[local-name()='result']/dateofacceptance"/>
<FIELD indexable="true" multivalued="false" name="resultacceptanceyear" result="false" stat="false" tokenizable="false" value="dnet:extractYear(//*[local-name()='entity']/*[local-name()='result']/dateofacceptance)"/> <FIELD copy="true" indexable="true" multivalued="false" name="resultacceptanceyear" result="false" stat="false" tokenizable="false" value="dnet:extractYear(//*[local-name()='entity']/*[local-name()='result']/dateofacceptance)"/>
<FIELD indexable="true" multivalued="true" name="resultauthor" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='result']/creator"/> <FIELD copy="true" indexable="true" multivalued="true" name="resultauthor" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='result']/creator"/>
<FIELD indexable="true" multivalued="true" name="resultauthor_nt" result="false" stat="false" type="string_ci" xpath="//*[local-name()='entity']/*[local-name()='result']/creator"/> <FIELD indexable="true" multivalued="true" name="resultauthor_nt" result="false" stat="false" type="string_ci" xpath="//*[local-name()='entity']/*[local-name()='result']/creator"/>
<FIELD indexable="true" multivalued="true" name="authorid" result="false" stat="false" type="string_ci" xpath="//*[local-name()='entity']/*[local-name()='result']/creator/@*[local-name() != 'rank' and local-name() != 'name' and local-name() != 'surname']"/> <FIELD indexable="true" multivalued="true" name="authorid" result="false" stat="false" type="string_ci" xpath="//*[local-name()='entity']/*[local-name()='result']/creator/@*[local-name() != 'rank' and local-name() != 'name' and local-name() != 'surname']"/>
<FIELD indexable="true" multivalued="true" name="authoridtype" result="false" stat="false" type="string_ci" xpath="//*[local-name()='entity']/*[local-name()='result']/creator/@*[local-name() != 'rank' and local-name() != 'name' and local-name() != 'surname']/local-name()"/> <FIELD indexable="true" multivalued="true" name="authoridtype" result="false" stat="false" type="string_ci" xpath="//*[local-name()='entity']/*[local-name()='result']/creator/@*[local-name() != 'rank' and local-name() != 'name' and local-name() != 'surname']/local-name()"/>
@ -94,26 +93,29 @@
<FIELD indexable="true" name="resultdupid" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*//children/result/@objidentifier"/> <FIELD indexable="true" name="resultdupid" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*//children/result/@objidentifier"/>
<FIELD indexable="true" name="organizationdupid" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*//children/organization/@objidentifier"/> <FIELD indexable="true" name="organizationdupid" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*//children/organization/@objidentifier"/>
<FIELD indexable="true" name="externalrefsite" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//children/externalreference/sitename)"/> <FIELD indexable="true" name="externalrefsite" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//children/externalreference/sitename)"/>
<FIELD indexable="true" name="externalreflabel" result="false" stat="false" tokenizable="true" xpath="distinct-values(//*[local-name()='entity']/*//children/externalreference/label)"/> <FIELD copy="true" indexable="true" name="externalreflabel" result="false" stat="false" tokenizable="true" xpath="distinct-values(//*[local-name()='entity']/*//children/externalreference/label)"/>
<FIELD indexable="true" name="externalrefclass" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//children/externalreference/qualifier/@classid)"/> <FIELD indexable="true" name="externalrefclass" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//children/externalreference/qualifier/@classid)"/>
<FIELD indexable="true" name="externalrefid" result="false" stat="false" tokenizable="false" xpath="(//*[local-name()='entity']/*//children/externalreference/refidentifier)"/> <FIELD indexable="true" name="externalrefid" result="false" stat="false" tokenizable="false" xpath="(//*[local-name()='entity']/*//children/externalreference/refidentifier)"/>
<FIELD indexable="true" name="resultidentifier" result="false" stat="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/children/instance/webresource/*[local-name()='url'])"/> <FIELD copy="true" indexable="true" name="resultidentifier" result="false" stat="false" type="string_ci" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/children/instance/webresource/*[local-name()='url'])"/>
<FIELD indexable="true" name="resultsource" result="false" stat="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/source)"/><!-- REL FIELDS --> <FIELD copy="true" indexable="true" name="resultsource" result="false" stat="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/source)"/>
<FIELD indexable="true" name="eoscifguidelines" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name() = 'result']/eoscifguidelines/@code)"/><!-- FOS and SDGs non tokenizable for faceted search-->
<FIELD indexable="true" name="fos" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/subject[@classid='FOS'])"/>
<FIELD indexable="true" name="sdg" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/subject[@classid='SDG'])"/><!-- REL FIELDS -->
<FIELD indexable="true" name="reldatasourcecompatibilityid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='datasource']/openairecompatibility/@classid)"/> <FIELD indexable="true" name="reldatasourcecompatibilityid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='datasource']/openairecompatibility/@classid)"/>
<FIELD indexable="true" name="relproject" result="false" stat="false" tokenizable="false" value="distinct-values(concat(./text(), '||', dnet:pickFirst(../acronym/text(), ../title/text())))" xpath="//*[local-name()='entity']/*//rel/to[@type='project']"/> <FIELD indexable="true" name="relproject" result="false" stat="false" tokenizable="false" value="distinct-values(concat(./text(), '||', dnet:pickFirst(../acronym/text(), ../title/text())))" xpath="//*[local-name()='entity']/*//rel/to[@type='project']"/>
<FIELD indexable="true" name="relprojectid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/to[@type='project'])"/> <FIELD indexable="true" name="relprojectid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/to[@type='project'])"/>
<FIELD indexable="true" name="relprojectcode" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='project']/code)"/> <FIELD indexable="true" name="relprojectcode" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='project']/code)"/>
<FIELD indexable="true" name="relprojectname" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='project']/acronym)"/> <FIELD copy="true" indexable="true" name="relprojectname" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='project']/acronym)"/>
<FIELD indexable="true" name="relprojecttitle" result="false" stat="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='project']/title)"/> <FIELD copy="true" indexable="true" name="relprojecttitle" result="false" stat="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='project']/title)"/>
<FIELD indexable="true" name="relcontracttypeid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='project']/contracttype/@classid)"/> <FIELD indexable="true" name="relcontracttypeid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='project']/contracttype/@classid)"/>
<FIELD indexable="true" name="relcontracttypename" result="false" stat="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='project']/contracttype/@classname)"/> <FIELD copy="true" indexable="true" name="relcontracttypename" result="false" stat="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='project']/contracttype/@classname)"/>
<FIELD indexable="true" name="relorganizationcountryid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='organization']/country/@classid)"/> <FIELD indexable="true" name="relorganizationcountryid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='organization']/country/@classid)"/>
<FIELD indexable="true" name="relorganizationcountryname" result="false" stat="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='organization']/country/@classname)"/> <FIELD copy="true" indexable="true" name="relorganizationcountryname" result="false" stat="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='organization']/country/@classname)"/>
<FIELD indexable="true" name="relorganizationid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/to[@type='organization'])"/> <FIELD indexable="true" name="relorganizationid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/to[@type='organization'])"/>
<FIELD indexable="true" name="relorganizationname" result="false" stat="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='organization']/legalname)"/> <FIELD copy="true" indexable="true" name="relorganizationname" result="false" stat="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='organization']/legalname)"/>
<FIELD indexable="true" name="relorganizationshortname" result="false" stat="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='organization']/legalshortname)"/> <FIELD copy="true" indexable="true" name="relorganizationshortname" result="false" stat="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='organization']/legalshortname)"/>
<FIELD indexable="true" name="relresultid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/to[@type='result'])"/> <FIELD indexable="true" name="relresultid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/to[@type='result'])"/>
<FIELD indexable="true" name="relresulttype" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/resulttype/@classid)"/> <FIELD indexable="true" name="relresulttype" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/to/@type)"/>
<FIELD indexable="true" name="relclass" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/to/@class)"/> <FIELD indexable="true" name="relclass" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/to/@class)"/>
<FIELD indexable="true" name="relfundinglevel0_id" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']//rel/funding/funding_level_0"/> <FIELD indexable="true" name="relfundinglevel0_id" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']//rel/funding/funding_level_0"/>
<FIELD indexable="true" name="relfundinglevel0_name" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']//rel/funding/funding_level_0/@name/string()"/> <FIELD indexable="true" name="relfundinglevel0_name" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']//rel/funding/funding_level_0/@name/string()"/>
@ -132,13 +134,15 @@
<FIELD indexable="true" name="relfunderjurisdiction" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']//rel/funding/funder/@jurisdiction)"/><!-- Collected from of the related entity. Available for result-result relationships --> <FIELD indexable="true" name="relfunderjurisdiction" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']//rel/funding/funder/@jurisdiction)"/><!-- Collected from of the related entity. Available for result-result relationships -->
<FIELD indexable="true" name="relcollectedfromid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/collectedfrom/@id)"/> <FIELD indexable="true" name="relcollectedfromid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/collectedfrom/@id)"/>
<FIELD indexable="true" name="relcollectedfromname" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/collectedfrom/@name)"/> <FIELD indexable="true" name="relcollectedfromname" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/collectedfrom/@name)"/>
<FIELD indexable="true" name="relvalidated" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./validated]/to[@type='project'])"/>
<FIELD indexable="true" name="semrelid" result="false" stat="false" tokenizable="false" value="concat(./to/text(), '||', ./to/@class/string())" xpath="//*[local-name()='entity']//rel"/><!-- COMMON FIELDS --> <FIELD indexable="true" name="semrelid" result="false" stat="false" tokenizable="false" value="concat(./to/text(), '||', ./to/@class/string())" xpath="//*[local-name()='entity']//rel"/><!-- COMMON FIELDS -->
<FIELD indexable="true" multivalued="false" name="dateofcollection" result="false" stat="false" type="pdate" value="//header/*[local-name()='dateOfCollection']"/> <FIELD indexable="true" multivalued="false" name="dateofcollection" result="false" stat="false" type="date" value="//header/*[local-name()='dateOfCollection']"/>
<FIELD indexable="true" name="status" result="false" stat="false" tokenizable="false" type="string_ci" xpath="//header/*[local-name()='status']"/>
<FIELD indexable="true" name="collectedfrom" result="false" stat="false" tokenizable="false" value="distinct-values(concat(./@id, '||', ./@name))" xpath="//*[local-name()='entity']/*/*[local-name()='collectedfrom'] | //*[local-name()='entity']/*//*[local-name() = 'instance']/*[local-name()='collectedfrom']"/> <FIELD indexable="true" name="collectedfrom" result="false" stat="false" tokenizable="false" value="distinct-values(concat(./@id, '||', ./@name))" xpath="//*[local-name()='entity']/*/*[local-name()='collectedfrom'] | //*[local-name()='entity']/*//*[local-name() = 'instance']/*[local-name()='collectedfrom']"/>
<FIELD indexable="true" name="collectedfromdatasourceid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*/*[local-name()='collectedfrom']/@id | //*[local-name()='entity']/*//*[local-name() = 'instance']/*[local-name()='collectedfrom']/@id)"/> <FIELD indexable="true" name="collectedfromdatasourceid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*/*[local-name()='collectedfrom']/@id | //*[local-name()='entity']/*//*[local-name() = 'instance']/*[local-name()='collectedfrom']/@id)"/>
<FIELD indexable="true" name="collectedfromname" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*/*[local-name()='collectedfrom']/@name | //*[local-name()='entity']/*//*[local-name() = 'instance']/*[local-name()='collectedfrom']/@name)"/> <FIELD indexable="true" name="collectedfromname" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*/*[local-name()='collectedfrom']/@name | //*[local-name()='entity']/*//*[local-name() = 'instance']/*[local-name()='collectedfrom']/@name)"/>
<FIELD indexable="true" name="originalid" result="false" stat="false" tokenizable="false" type="string_ci" xpath="//*[local-name()='entity']/*/*[local-name()='originalId']"/> <FIELD indexable="true" name="originalid" result="false" stat="false" tokenizable="false" type="string_ci" xpath="//*[local-name()='entity']/*/*[local-name()='originalId']"/>
<FIELD indexable="true" name="pid" result="false" stat="false" tokenizable="false" type="string_ci" xpath="distinct-values(//*[local-name()='entity']/*/pid/text()|//*[local-name()='instance']/*[local-name()='alternateidentifier']/text())"/> <FIELD indexable="true" name="pid" result="false" stat="false" tokenizable="false" type="string_ci" xpath="//*[local-name()='entity']/*/pid/text()"/>
<FIELD indexable="true" name="pidclassid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*/pid/@classid)"/> <FIELD indexable="true" name="pidclassid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*/pid/@classid)"/>
<FIELD indexable="true" name="pidclassname" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*/pid/@classname)"/> <FIELD indexable="true" name="pidclassname" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*/pid/@classname)"/>
<FIELD indexable="true" name="inferred" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']//datainfo/inferred"/> <FIELD indexable="true" name="inferred" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']//datainfo/inferred"/>
@ -156,20 +160,6 @@
<FIELD indexable="true" name="categoryname" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/context/category/@label)"/> <FIELD indexable="true" name="categoryname" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/context/category/@label)"/>
<FIELD indexable="true" name="conceptid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/context/category//concept/@id)"/> <FIELD indexable="true" name="conceptid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/context/category//concept/@id)"/>
<FIELD indexable="true" name="conceptname" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/context/category//concept/@label)"/><!-- new index field for country info from different xpaths for any type of entity --> <FIELD indexable="true" name="conceptname" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/context/category//concept/@label)"/><!-- new index field for country info from different xpaths for any type of entity -->
<FIELD indexable="true" name="country" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*/country/@classid | //*[local-name()='entity']/*//rel[./to/@type='organization']/country/@classid | //*[local-name()='entity']//funder/@jurisdiction)"/><!-- COUNTER FIELDS --> <FIELD indexable="true" name="country" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*/country/@classid | //*[local-name()='entity']/*//rel[./to/@type='organization']/country/@classid | //*[local-name()='entity']//funder/@jurisdiction)"/>
<FIELD header="true" indexable="true" multivalued="false" name="counter_dedup" result="false" stored="true" type="pint" xpath="/record/result/*[local-name()='header']/*[local-name()='counters']/counter_dedup/@value"/>
<FIELD header="true" indexable="true" multivalued="false" name="counter_authorship" result="false" stored="true" type="pint" xpath="/record/result/*[local-name()='header']/*[local-name()='counters']/counter_authorship/@value"/>
<FIELD header="true" indexable="true" multivalued="false" name="counter_participation" result="false" stored="true" type="pint" xpath="/record/result/*[local-name()='header']/*[local-name()='counters']/counter_participation/@value"/>
<FIELD header="true" indexable="true" multivalued="false" name="counter_similarity" result="false" stored="true" type="pint" xpath="/record/result/*[local-name()='header']/*[local-name()='counters']/counter_similarity/@value"/>
<FIELD header="true" indexable="true" multivalued="false" name="counter_publicationdataset" result="false" stored="true" type="pint" xpath="/record/result/*[local-name()='header']/*[local-name()='counters']/counter_publicationDataset/@value"/>
<FIELD header="true" indexable="true" multivalued="false" name="counter_publicationdataset_claimed" result="false" stored="true" type="pint" xpath="/record/result/*[local-name()='header']/*[local-name()='counters']/counter_publicationDataset_claimed/@value"/>
<FIELD header="true" indexable="true" multivalued="false" name="counter_publicationdataset_collected" result="false" stored="true" type="pint" xpath="/record/result/*[local-name()='header']/*[local-name()='counters']/counter_publicationDataset_collected/@value"/>
<FIELD header="true" indexable="true" multivalued="false" name="counter_publicationdataset_inferred" result="false" stored="true" type="pint" xpath="/record/result/*[local-name()='header']/*[local-name()='counters']/counter_publicationDataset_inferred/@value"/>
<FIELD header="true" indexable="true" multivalued="false" name="counter_outcome" result="false" stored="true" type="pint" xpath="/record/result/*[local-name()='header']/*[local-name()='counters']/counter_outcome/@value"/>
<FIELD header="true" indexable="true" multivalued="false" name="counter_outcome_claimed" result="false" stored="true" type="pint" xpath="/record/result/*[local-name()='header']/*[local-name()='counters']/counter_outcome_claimed/@value"/>
<FIELD header="true" indexable="true" multivalued="false" name="counter_outcome_collected" result="false" stored="true" type="pint" xpath="/record/result/*[local-name()='header']/*[local-name()='counters']/counter_outcome_collected/@value"/>
<FIELD header="true" indexable="true" multivalued="false" name="counter_outcome_inferred" result="false" stored="true" type="pint" xpath="/record/result/*[local-name()='header']/*[local-name()='counters']/counter_outcome_inferred/@value"/>
<FIELD header="true" indexable="true" multivalued="false" name="counter_affiliation" result="false" stored="true" type="pint" xpath="/record/result/*[local-name()='header']/*[local-name()='counters']/counter_affiliation/@value"/>
<FIELD header="true" indexable="true" multivalued="false" name="counter_doi" result="false" stored="true" type="pint" xpath="/record/result/*[local-name()='header']/*[local-name()='counters']/counter_doi/@value"/>
</FIELDS> </FIELDS>
</LAYOUT> </LAYOUT>

View File

@ -21,7 +21,7 @@
</property> </property>
<property> <property>
<name>hive_jdbc_url</name> <name>hive_jdbc_url</name>
<value>jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000</value> <value>jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000/;UseNativeQuery=1;?spark.executor.memory=19166291558;spark.yarn.executor.memoryOverhead=3225;spark.driver.memory=11596411699;spark.yarn.driver.memoryOverhead=1228</value>
</property> </property>
<property> <property>
<name>oozie.wf.workflow.notification.url</name> <name>oozie.wf.workflow.notification.url</name>

View File

@ -42,7 +42,9 @@ SELECT p.id,
CASE WHEN prr2.id IS NULL THEN 0 ELSE prr2.dp END AS delayedpubs, CASE WHEN prr2.id IS NULL THEN 0 ELSE prr2.dp END AS delayedpubs,
p.callidentifier, p.callidentifier,
p.code, p.code,
p.totalcost p.totalcost,
p.fundedamount,
p.currency
FROM ${stats_db_name}.project_tmp p FROM ${stats_db_name}.project_tmp p
LEFT JOIN (SELECT pr.id, count(distinct pr.result) AS np LEFT JOIN (SELECT pr.id, count(distinct pr.result) AS np
FROM ${stats_db_name}.project_results pr FROM ${stats_db_name}.project_results pr

View File

@ -59,7 +59,7 @@ UNION ALL
SELECT * FROM ${stats_db_name}.otherresearchproduct_sources; SELECT * FROM ${stats_db_name}.otherresearchproduct_sources;
create table ${stats_db_name}.result_orcid STORED AS PARQUET as CREATE TABLE IF NOT EXISTS ${stats_db_name}.result_orcid STORED AS PARQUET as
select distinct res.id, regexp_replace(res.orcid, 'http://orcid.org/' ,'') as orcid select distinct res.id, regexp_replace(res.orcid, 'http://orcid.org/' ,'') as orcid
from ( from (
SELECT substr(res.id, 4) as id, auth_pid.value as orcid SELECT substr(res.id, 4) as id, auth_pid.value as orcid
@ -69,7 +69,7 @@ from (
LATERAL VIEW explode(auth.pid.qualifier.classid) apt as author_pid_type LATERAL VIEW explode(auth.pid.qualifier.classid) apt as author_pid_type
WHERE res.datainfo.deletedbyinference = FALSE and res.datainfo.invisible = FALSE and author_pid_type = 'orcid') as res; WHERE res.datainfo.deletedbyinference = FALSE and res.datainfo.invisible = FALSE and author_pid_type = 'orcid') as res;
create table ${stats_db_name}.result_result stored as parquet as CREATE TABLE IF NOT EXISTS ${stats_db_name}.result_result stored as parquet as
select substr(rel.source, 4) as source, substr(rel.target, 4) as target, relclass, subreltype select substr(rel.source, 4) as source, substr(rel.target, 4) as target, relclass, subreltype
from ${openaire_db_name}.relation rel from ${openaire_db_name}.relation rel
join ${openaire_db_name}.result r1 on rel.source=r1.id join ${openaire_db_name}.result r1 on rel.source=r1.id
@ -82,7 +82,7 @@ where reltype='resultResult'
and r2.resulttype.classname != 'other' and r2.resulttype.classname != 'other'
and rel.datainfo.deletedbyinference=false and rel.datainfo.invisible = FALSE; and rel.datainfo.deletedbyinference=false and rel.datainfo.invisible = FALSE;
create table ${stats_db_name}.result_citations_oc stored as parquet as CREATE TABLE IF NOT EXISTS ${stats_db_name}.result_citations_oc stored as parquet as
select substr(target, 4) as id, count(distinct substr(source, 4)) as citations select substr(target, 4) as id, count(distinct substr(source, 4)) as citations
from ${openaire_db_name}.relation rel from ${openaire_db_name}.relation rel
join ${openaire_db_name}.result r1 on rel.source=r1.id join ${openaire_db_name}.result r1 on rel.source=r1.id
@ -97,7 +97,7 @@ where relClass='Cites' and rel.datainfo.provenanceaction.classid = 'sysimport:cr
and rel.datainfo.deletedbyinference=false and rel.datainfo.invisible = FALSE and rel.datainfo.deletedbyinference=false and rel.datainfo.invisible = FALSE
group by substr(target, 4); group by substr(target, 4);
create table ${stats_db_name}.result_references_oc stored as parquet as CREATE TABLE IF NOT EXISTS ${stats_db_name}.result_references_oc stored as parquet as
select substr(source, 4) as id, count(distinct substr(target, 4)) as references select substr(source, 4) as id, count(distinct substr(target, 4)) as references
from ${openaire_db_name}.relation rel from ${openaire_db_name}.relation rel
join ${openaire_db_name}.result r1 on rel.source=r1.id join ${openaire_db_name}.result r1 on rel.source=r1.id

View File

@ -42,7 +42,7 @@ join ${stats_db_name}.result res on res.id=r.id;
create table ${stats_db_name}.result_apc as create table ${stats_db_name}.result_apc as
select r.id, r.amount, r.currency select r.id, r.amount, r.currency
from ( from (
select substr(r.id, 4) as id, inst.processingchargeamount.value as amount, inst.processingchargecurrency.value as currency select substr(r.id, 4) as id, cast(inst.processingchargeamount.value as float) as amount, inst.processingchargecurrency.value as currency
from ${openaire_db_name}.result r lateral view explode(r.instance) instances as inst) r from ${openaire_db_name}.result r lateral view explode(r.instance) instances as inst) r
join ${stats_db_name}.result res on res.id=r.id join ${stats_db_name}.result res on res.id=r.id
where r.amount is not null; where r.amount is not null;

View File

@ -454,16 +454,16 @@ FROM publication_datasources pd
compute stats indi_pub_hybrid_oa_with_cc; compute stats indi_pub_hybrid_oa_with_cc;
create table indi_pub_downloads stored as parquet as create table indi_pub_downloads stored as parquet as
SELECT result_id, sum(downloads) no_dowloads from openaire_prod_usage_stats.usage_stats SELECT result_id, sum(downloads) no_downloads from openaire_prod_usage_stats.usage_stats
join publication on result_id=id join publication on result_id=id
where downloads>0 where downloads>0
GROUP BY result_id GROUP BY result_id
order by no_dowloads desc; order by no_downloads desc;
compute stats indi_pub_downloads; compute stats indi_pub_downloads;
create table indi_pub_downloads_datasource stored as parquet as create table indi_pub_downloads_datasource stored as parquet as
SELECT result_id, repository_id, sum(downloads) no_dowloads from openaire_prod_usage_stats.usage_stats SELECT result_id, repository_id, sum(downloads) no_downloads from openaire_prod_usage_stats.usage_stats
join publication on result_id=id join publication on result_id=id
where downloads>0 where downloads>0
GROUP BY result_id, repository_id GROUP BY result_id, repository_id
@ -472,7 +472,7 @@ order by result_id;
compute stats indi_pub_downloads_datasource; compute stats indi_pub_downloads_datasource;
create table indi_pub_downloads_year stored as parquet as create table indi_pub_downloads_year stored as parquet as
SELECT result_id, substring(us.`date`, 1,4) as `year`, sum(downloads) no_dowloads from openaire_prod_usage_stats.usage_stats us SELECT result_id, substring(us.`date`, 1,4) as `year`, sum(downloads) no_downloads from openaire_prod_usage_stats.usage_stats us
join publication on result_id=id where downloads>0 join publication on result_id=id where downloads>0
GROUP BY result_id, `year` GROUP BY result_id, `year`
order by `year` asc; order by `year` asc;
@ -480,7 +480,7 @@ order by `year` asc;
compute stats indi_pub_downloads_year; compute stats indi_pub_downloads_year;
create table indi_pub_downloads_datasource_year stored as parquet as create table indi_pub_downloads_datasource_year stored as parquet as
SELECT result_id, substring(us.`date`, 1,4) as `year`, repository_id, sum(downloads) no_dowloads from openaire_prod_usage_stats.usage_stats us SELECT result_id, substring(us.`date`, 1,4) as `year`, repository_id, sum(downloads) no_downloads from openaire_prod_usage_stats.usage_stats us
join publication on result_id=id join publication on result_id=id
where downloads>0 where downloads>0
GROUP BY result_id, repository_id, `year` GROUP BY result_id, repository_id, `year`

View File

@ -39,7 +39,6 @@ create table TARGET.result stored as parquet as
'openorgs____::5f31346d444a7f06a28c880fb170b0f6', --Ghent University 'openorgs____::5f31346d444a7f06a28c880fb170b0f6', --Ghent University
'openorgs____::2dbe47117fd5409f9c61620813456632', --University of Luxembourg 'openorgs____::2dbe47117fd5409f9c61620813456632', --University of Luxembourg
'openorgs____::6445d7758d3a40c4d997953b6632a368', --National Institute of Informatics (NII) 'openorgs____::6445d7758d3a40c4d997953b6632a368', --National Institute of Informatics (NII)
'openorgs____::b77c01aa15de3675da34277d48de2ec1', -- Valencia Catholic University Saint Vincent Martyr 'openorgs____::b77c01aa15de3675da34277d48de2ec1', -- Valencia Catholic University Saint Vincent Martyr
'openorgs____::7fe2f66cdc43983c6b24816bfe9cf6a0', -- Unviersity of Warsaw 'openorgs____::7fe2f66cdc43983c6b24816bfe9cf6a0', -- Unviersity of Warsaw
'openorgs____::15e7921fc50d9aa1229a82a84429419e', -- University Of Thessaly 'openorgs____::15e7921fc50d9aa1229a82a84429419e', -- University Of Thessaly
@ -224,18 +223,3 @@ create table TARGET.indi_result_with_pid stored as parquet as select * from SOUR
--create table TARGET.indi_software_gold_oa stored as parquet as select * from SOURCE.indi_software_gold_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id); --create table TARGET.indi_software_gold_oa stored as parquet as select * from SOURCE.indi_software_gold_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--compute stats TARGET.indi_software_gold_oa; --compute stats TARGET.indi_software_gold_oa;
--denorm
alter table TARGET.result rename to TARGET.res_tmp;
create table TARGET.result_denorm stored as parquet as
select distinct r.*, rp.project, p.acronym as pacronym, p.title as ptitle, p.funder as pfunder, p.funding_lvl0 as pfunding_lvl0, rd.datasource, d.name as dname, d.type as dtype
from TARGET.res_tmp r
left outer join TARGET.result_projects rp on rp.id=r.id
left outer join TARGET.result_datasources rd on rd.id=r.id
left outer join TARGET.project p on p.id=rp.project
left outer join TARGET.datasource d on d.id=rd.datasource;
compute stats TARGET.result_denorm;
alter table TARGET.result_denorm rename to TARGET.result;
drop table TARGET.res_tmp;
--- done!

View File

@ -48,7 +48,9 @@ CREATE TABLE ${stats_db_name}.project_tmp
delayedpubs INT, delayedpubs INT,
callidentifier STRING, callidentifier STRING,
code STRING, code STRING,
totalcost FLOAT totalcost FLOAT,
fundedamount FLOAT,
currency STRING
) CLUSTERED BY (id) INTO 100 buckets stored AS orc tblproperties ('transactional' = 'true'); ) CLUSTERED BY (id) INTO 100 buckets stored AS orc tblproperties ('transactional' = 'true');
INSERT INTO ${stats_db_name}.project_tmp INSERT INTO ${stats_db_name}.project_tmp
@ -72,7 +74,9 @@ SELECT substr(p.id, 4) AS id,
0 AS delayedpubs, 0 AS delayedpubs,
p.callidentifier.value AS callidentifier, p.callidentifier.value AS callidentifier,
p.code.value AS code, p.code.value AS code,
p.totalcost AS totalcost p.totalcost AS totalcost,
p.fundedamount AS fundedamount,
p.currency.value AS currency
FROM ${openaire_db_name}.project p FROM ${openaire_db_name}.project p
WHERE p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; WHERE p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;