From 1699d41d39a5d70e681589b3cf03b9d61d1a9a06 Mon Sep 17 00:00:00 2001 From: miconis Date: Thu, 5 Nov 2020 15:48:42 +0100 Subject: [PATCH 01/55] relations for openorgs: not it choose only one master --- .../dhp/oa/dedup/SparkPrepareOrgRels.java | 50 ++++++++++--------- 1 file changed, 26 insertions(+), 24 deletions(-) diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPrepareOrgRels.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPrepareOrgRels.java index e9933c4e5..19e6127f6 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPrepareOrgRels.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPrepareOrgRels.java @@ -1,13 +1,9 @@ + package eu.dnetlib.dhp.oa.dedup; -import com.google.common.collect.Lists; -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.oa.dedup.model.OrgSimRel; -import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.Organization; -import eu.dnetlib.dhp.schema.oaf.Relation; -import eu.dnetlib.dhp.utils.ISLookupClientFactory; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; +import java.io.IOException; +import java.util.*; + import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; @@ -17,12 +13,19 @@ import org.apache.spark.sql.SaveMode; import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; + +import com.google.common.collect.Lists; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.oa.dedup.model.OrgSimRel; +import eu.dnetlib.dhp.schema.common.ModelSupport; +import eu.dnetlib.dhp.schema.oaf.Organization; +import eu.dnetlib.dhp.schema.oaf.Relation; +import eu.dnetlib.dhp.utils.ISLookupClientFactory; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; import scala.Tuple2; import scala.Tuple3; -import java.io.IOException; -import java.util.*; - public class SparkPrepareOrgRels extends AbstractSparkAction { private static final Logger log = LoggerFactory.getLogger(SparkCreateDedupRecord.class); @@ -125,12 +128,11 @@ public class SparkPrepareOrgRels extends AbstractSparkAction { List ids = sortIds(l); List> rels = new ArrayList<>(); - for (String source : ids) { - if (source.contains("openorgs____") || ids.indexOf(source) == 0) - for (String target : ids) { - rels.add(new Tuple3<>(source, target, groupId)); - } + String source = ids.get(0); + for (String target : ids) { + rels.add(new Tuple3<>(source, target, groupId)); } + return rels.iterator(); }) .rdd(), @@ -235,14 +237,14 @@ public class SparkPrepareOrgRels extends AbstractSparkAction { .joinWith(entities, relations.col("_2").equalTo(entities.col("_1")), "inner") .map( (MapFunction, Tuple2>, OrgSimRel>) r -> new OrgSimRel( - r._1()._1(), - r._2()._2().getOriginalId().get(0), - r._2()._2().getLegalname() != null ? r._2()._2().getLegalname().getValue() : "", - r._2()._2().getLegalshortname() != null ? r._2()._2().getLegalshortname().getValue() : "", - r._2()._2().getCountry() != null ? r._2()._2().getCountry().getClassid() : "", - r._2()._2().getWebsiteurl() != null ? r._2()._2().getWebsiteurl().getValue() : "", - r._2()._2().getCollectedfrom().get(0).getValue(), - "group::" + r._1()._1()), + r._1()._1(), + r._2()._2().getOriginalId().get(0), + r._2()._2().getLegalname() != null ? r._2()._2().getLegalname().getValue() : "", + r._2()._2().getLegalshortname() != null ? r._2()._2().getLegalshortname().getValue() : "", + r._2()._2().getCountry() != null ? r._2()._2().getCountry().getClassid() : "", + r._2()._2().getWebsiteurl() != null ? r._2()._2().getWebsiteurl().getValue() : "", + r._2()._2().getCollectedfrom().get(0).getValue(), + "group::" + r._1()._1()), Encoders.bean(OrgSimRel.class)) .map( (MapFunction>) o -> new Tuple2<>(o.getLocal_id(), o), From 1cb79719a7295bd3fb0f145bc9bc9217d8f116bf Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 28 Sep 2022 11:44:55 +0200 Subject: [PATCH 02/55] [BulkTag] fixed issues --- .../dhp/bulktag/community/ResultTagger.java | 11 ++++++----- .../eu/dnetlib/dhp/bulktag/BulkTagJobTest.java | 15 +-------------- .../update_subject_datasource/dataset_10.json | 10 ++++++++++ .../update_subject_datasource/dataset_10.json.gz | Bin 6968 -> 0 bytes 4 files changed, 17 insertions(+), 19 deletions(-) create mode 100644 dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject_datasource/dataset_10.json delete mode 100644 dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject_datasource/dataset_10.json.gz diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java index ccb69a97d..9286c7385 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java @@ -152,7 +152,8 @@ public class ResultTagger implements Serializable { } result.getContext().forEach(c -> { - if (communities.contains(c.getId())) { + String cId = c.getId(); + if (communities.contains(cId)) { Optional> opt_dataInfoList = Optional.ofNullable(c.getDataInfo()); List dataInfoList; if (opt_dataInfoList.isPresent()) @@ -161,19 +162,19 @@ public class ResultTagger implements Serializable { dataInfoList = new ArrayList<>(); c.setDataInfo(dataInfoList); } - if (subjects.contains(c)) + if (subjects.contains(cId)) dataInfoList .add(OafMapperUtils.dataInfo(false, BULKTAG_DATA_INFO_TYPE, true, false, OafMapperUtils.qualifier(CLASS_ID_SUBJECT, CLASS_NAME_BULKTAG_SUBJECT, DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS), TAGGING_TRUST)); - if (datasources.contains(c)) + if (datasources.contains(cId)) dataInfoList .add(OafMapperUtils.dataInfo(false, BULKTAG_DATA_INFO_TYPE, true, false, OafMapperUtils.qualifier(CLASS_ID_DATASOURCE, CLASS_NAME_BULKTAG_DATASOURCE, DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS), TAGGING_TRUST)); - if (czenodo.contains(c)) + if (czenodo.contains(cId)) dataInfoList .add(OafMapperUtils.dataInfo(false, BULKTAG_DATA_INFO_TYPE, true, false, OafMapperUtils.qualifier(CLASS_ID_CZENODO, CLASS_NAME_BULKTAG_ZENODO, DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS), TAGGING_TRUST)); - if (aconstraints.contains(c)) + if (aconstraints.contains(cId)) dataInfoList .add( OafMapperUtils.dataInfo(false, BULKTAG_DATA_INFO_TYPE, true, false, diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java index 0212f4a49..30d9a43b2 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java @@ -21,6 +21,7 @@ import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.ForeachFunction; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Row; import org.apache.spark.sql.SparkSession; @@ -778,18 +779,4 @@ public class BulkTagJobTest { 1, idExplodeCommunity.filter("provenance = 'community:advconstraint'").count()); } -// @Test -// void test1(){ -// ProtoMap params = new Gson().fromJson(pathMap, ProtoMap.class); -// HashMap param = new HashMap<>(); -// for (String key : params.keySet()) { -// try { -// param.put(key, jsonContext.read(params.get(key))); -// } catch (com.jayway.jsonpath.PathNotFoundException e) { -// param.put(key, new ArrayList<>()); -// } -// } -// return param; -// } -// } } diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject_datasource/dataset_10.json b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject_datasource/dataset_10.json new file mode 100644 index 000000000..210719e29 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject_datasource/dataset_10.json @@ -0,0 +1,10 @@ +{"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1585055868909,"id":"50|od______3989::02dd5d2c222191b0b9bd4f33c8e96529","originalId":["od______3989::02dd5d2c222191b0b9bd4f33c8e96529"],"collectedfrom":[{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null}],"pid":[{"value":"10.4185/RLCS-2018-1243","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofcollection":"2020-03-03T13:05:26.091Z","dateoftransformation":"2020-03-03T13:06:53.161Z","extraInfo":[],"oaiprovenance":{"originDescription":{"harvestDate":"2020-03-03T13:05:26.091Z","altered":true,"baseURL":"http%3A%2F%2Frepositorio.ucjc.edu%2Foai%2Frequest","identifier":"oai:repositorio.ucjc.edu:20.500.12020/562","datestamp":"2018-01-23T15:06:07Z","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"author":[{"fullname":"Gallardo-Camacho, Jorge","name":"Jorge","surname":"Gallardo-Camacho","rank":1,"pid":null,"affiliation":null},{"fullname":"Trujillo Fernández, José Ramón","name":"José Ramón","surname":"Trujillo Fernández","rank":2,"pid":null,"affiliation":null},{"fullname":"Jorge Alonso, Ana","name":"Ana","surname":"Jorge Alonso","rank":3,"pid":null,"affiliation":null}],"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"esl/spa","classname":"Spanish","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[{"value":"COVID-19","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"yihadismo","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"viralidad","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"vídeo","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"propaganda","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"terrorismo","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"terrorism","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"jihadism","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"virality","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"video","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"5903 Ideologías Políticas","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"6310.13 Terrorismo","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[{"value":"El individualismo como estrategia","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Individualism as an improvised strategy","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[],"description":[{"value":"Se analiza como el Yihadismo distribuye su mensaje propaganístico en You Tube.\nUniversidad Camilo José Cela","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2018-01-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":null,"embargoenddate":null,"source":[],"fulltext":[],"format":[],"contributor":[{"value":"MediaLab - UCJC","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"resourcetype":null,"coverage":[],"bestaccessright":null,"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"propagation","inferred":true,"invisible":false,"provenanceaction":{"classid":"propagation:community:productsthroughsemrel","classname":" Propagation of result belonging to community through semantic relation","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.85"}],"id":"ee"},{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"propagation","inferred":true,"invisible":false,"provenanceaction":{"classid":"propagation:community:productsthroughsemrel","classname":" Propagation of result belonging to community through semantic relation","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.85"}],"id":"fam"},{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"propagation","inferred":true,"invisible":false,"provenanceaction":{"classid":"propagation:community:productsthroughsemrel","classname":" Propagation of result belonging to community through semantic relation","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.85"}],"id":"covid-19"}],"externalReference":[],"instance":[{"license":{"value":"http://creativecommons.org/licenses/by-nc-nd/4.0/","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"instancetype":{"classid":"0024","classname":"Film","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|doajarticles::8cec81178926caaca531afbd8eb5d64c","value":"Depósito Digital e-UCJC","dataInfo":null},"url":["http://hdl.handle.net/20.500.12020/562"],"distributionlocation":"","collectedfrom":{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null},"dateofacceptance":{"value":"2018-01-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"storagedate":null,"device":null,"size":null,"version":null,"lastmetadataupdate":null,"metadataversionnumber":null,"geolocation":[]} +{"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1585055868954,"id":"50|od______3989::05d8c751462f9bb8d2b06956dfbc5c7b","originalId":["od______3989::05d8c751462f9bb8d2b06956dfbc5c7b"],"collectedfrom":[{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null}],"pid":[],"dateofcollection":"2020-03-03T13:05:26.079Z","dateoftransformation":"2020-03-03T13:06:53.239Z","extraInfo":[],"oaiprovenance":{"originDescription":{"harvestDate":"2020-03-03T13:05:26.079Z","altered":true,"baseURL":"http%3A%2F%2Frepositorio.ucjc.edu%2Foai%2Frequest","identifier":"oai:repositorio.ucjc.edu:20.500.12020/227","datestamp":"2017-11-15T16:29:55Z","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"author":[{"fullname":"Rico Pérez, Marta","name":"Marta","surname":"Rico Pérez","rank":1,"pid":null,"affiliation":null}],"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"esl/spa","classname":"Spanish","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[{"value":"coronavirus disease-19","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"agriculture","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Retos","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Deporte","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[{"value":"I Congreso Nuevos Retos en los Eventos Deportivos","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[],"description":[{"value":"I Congreso Nuevos Retos en los Eventos Deportivos, celebrado en la Universidad Camilo José Cela.\n\n\n\nDerivado del proyecto de investigación \"La situación de los eventos deportivos en España a través de su estructuras y de sus herramientas\", tiene como finalidad conocer la situación de los eventos deportivos en España. Se realizó un estudio descriptivo holístico en el que se investigaron elementos culturales, el perfil de los recursos humanos, la estructura de los comités, entre otras.","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2015-06-20","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":null,"embargoenddate":null,"source":[],"fulltext":[],"format":[],"contributor":[],"resourcetype":null,"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":{"value":"http://creativecommons.org/licenses/by-nc-nd/4.0/","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"instancetype":{"classid":"0024","classname":"Film","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|opendoar____::1a551829d50f1400b0dab21fdd969c04","value":"Depósito Digital e-UCJC","dataInfo":null},"url":["http://hdl.handle.net/20.500.12020/227"],"distributionlocation":"","collectedfrom":{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null},"dateofacceptance":{"value":"2015-06-20","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"storagedate":null,"device":null,"size":null,"version":null,"lastmetadataupdate":null,"metadataversionnumber":null,"geolocation":[]} +{"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1585055869097,"id":"50|od______3989::0f89464c4ac4c398fe0c71433b175a62","originalId":["od______3989::0f89464c4ac4c398fe0c71433b175a62"],"collectedfrom":[{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null}],"pid":[],"dateofcollection":"2020-03-03T13:05:26.069Z","dateoftransformation":"2020-03-03T13:06:53.54Z","extraInfo":[],"oaiprovenance":{"originDescription":{"harvestDate":"2020-03-03T13:05:26.069Z","altered":true,"baseURL":"http%3A%2F%2Frepositorio.ucjc.edu%2Foai%2Frequest","identifier":"oai:repositorio.ucjc.edu:20.500.12020/802","datestamp":"2018-11-14T15:30:47Z","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"author":[{"fullname":"Universidad Camilo José Cela, Ciencias de la Actividad Física y del Deporte","name":"Ciencias La Actividad Física Y. Del Deporte","surname":"Universidad Camilo José Cela","rank":1,"pid":null,"affiliation":null}],"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"esl/spa","classname":"Spanish","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[{"value":"fishery","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"sea","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Impacto físico","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Proyectos de investigación","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"2411.06 Fisiología del Ejercicio","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[{"value":"Investigaciones en Ciencias de la Actividad Física y del Deporte CCAFD - UCJC","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[],"description":[{"value":"Blanca Romero, investigadora y docente de Ciencias de la Actividad Física y del Deporte en la Universidad Camilo José Cela, describe dos de las líneas de investigación que se han desarrollado en el departamento CCAFD de UCJC: el impacto físico de las diferentes actividades deportivas y los métodos de recuperación más eficaces tras el ejercicio físico.\nBlanca también destaca la importancia en el ámbito de la salud los estudios en Ciencias de la Actividad Física y del Deporte.\nCiencias de la Actividad Física y del Deporte en la Universidad Camilo José Cela","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2016-03-21","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":null,"embargoenddate":null,"source":[],"fulltext":[],"format":[],"contributor":[{"value":"Universidad Camilo José Cela, Ciencias de la Actividad Física y del Deporte","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"resourcetype":null,"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":{"value":"http://creativecommons.org/licenses/by-nc-nd/4.0/","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"instancetype":{"classid":"0024","classname":"Film","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null},"url":["http://hdl.handle.net/20.500.12020/802"],"distributionlocation":"","collectedfrom":{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null},"dateofacceptance":{"value":"2016-03-21","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"storagedate":null,"device":null,"size":null,"version":null,"lastmetadataupdate":null,"metadataversionnumber":null,"geolocation":[]} +{"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1585055869215,"id":"50|od______3989::1729c3988199b95d1d566851af7d3c55","originalId":["od______3989::1729c3988199b95d1d566851af7d3c55"],"collectedfrom":[{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null}],"pid":[],"dateofcollection":"2020-03-03T13:05:25.653Z","dateoftransformation":"2020-03-03T13:06:53.764Z","extraInfo":[],"oaiprovenance":{"originDescription":{"harvestDate":"2020-03-03T13:05:25.653Z","altered":true,"baseURL":"http%3A%2F%2Frepositorio.ucjc.edu%2Foai%2Frequest","identifier":"oai:repositorio.ucjc.edu:20.500.12020/801","datestamp":"2018-11-14T15:19:38Z","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"author":[{"fullname":"Universidad Camilo José Cela, Ciencias de la Actividad Física y del Deporte","name":"Ciencias La Actividad Física Y. Del Deporte","surname":"Universidad Camilo José Cela","rank":1,"pid":null,"affiliation":null}],"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"esl/spa","classname":"Spanish","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[{"value":"Ciencias de la Actividad Física y del Deporte","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Prevención","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Hidratación","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Deporte","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Deportistas","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Electrolitos","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Deportes de resistencia","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"2411.06 Fisiología del Ejercicio","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[{"value":"La importancia de la hidratación en competiciones resistencia, por Juan del Coso","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[],"description":[{"value":"Juan del Coso, investigador y profesor en el Grado en Ciencias de la Actividad Física y del Deporte UCJC, presenta el proyecto de investigación que ha realizado el departamento CCAFD de UCJC: la prevención de desequilibrios de agua y electrolitos que se dan en los deportes de resistencia para que los deportistas realicen su actividad de una manera más segura.\nMás información: http://www.ucjc.edu/2016/03/investiga...\nUniversidad Camilo José Cela, Ciencias de la Actividad Física y del Deporte","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2016-03-21","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":null,"embargoenddate":null,"source":[],"fulltext":[],"format":[],"contributor":[{"value":"Grado en Ciencias de la Actividad Física y del Deporte UCJC","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"resourcetype":null,"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":{"value":"http://creativecommons.org/licenses/by-nc-nd/4.0/","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"instancetype":{"classid":"0024","classname":"Film","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null},"url":["http://hdl.handle.net/20.500.12020/801"],"distributionlocation":"","collectedfrom":{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null},"dateofacceptance":{"value":"2016-03-21","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"storagedate":null,"device":null,"size":null,"version":null,"lastmetadataupdate":null,"metadataversionnumber":null,"geolocation":[]} +{"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1585055869487,"id":"50|od______3989::2e3f34ce90520fae350a7e1148d7dcea","originalId":["od______3989::2e3f34ce90520fae350a7e1148d7dcea"],"collectedfrom":[{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null}],"pid":[],"dateofcollection":"2020-03-03T13:05:34.912Z","dateoftransformation":"2020-03-03T13:06:54.218Z","extraInfo":[],"oaiprovenance":{"originDescription":{"harvestDate":"2020-03-03T13:05:34.912Z","altered":true,"baseURL":"http%3A%2F%2Frepositorio.ucjc.edu%2Foai%2Frequest","identifier":"oai:repositorio.ucjc.edu:20.500.12020/800","datestamp":"2018-11-14T13:43:21Z","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"author":[{"fullname":"Universidad Camilo José Cela, Vicerrectorado de Innovación","name":"Vicerrectorado Innovación","surname":"Universidad Camilo José Cela","rank":1,"pid":null,"affiliation":null}],"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"esl/spa","classname":"Spanish","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[{"value":"Ciencias de la Actividad Física y del Deporte","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Deporte","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Cafeína","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Efectos","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Alto rendimiento","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Competiciones","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Beneficios","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"2411.06 Fisiología del Ejercicio","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[{"value":"Investigación - Efectos positivos y negativos del uso de la cafeína en el deporte","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[],"description":[{"value":"Proyecto de investigación que se están desarrollando en la Universidad Camilo José Cela (UCJC) sobre la cafeína y sus efectos cuando se realiza deporte. https://www.ucjc.edu/2016/08/la-bbc-s...\n\nJuan Del Coso, director del Laboratorio de Fisiología del Ejercicio de la Universidad Camilo José Cela, nos describe cómo está siendo la investigación que desarrolla con deportistas de alto rendimiento y el consumo de cafeína.\n\n\"Nuestra investigación se enmarca dentro del campo de la fisiología del ejercicio y el objetivo que tenemos es investigar los efectos que tiene la cafeína en los deportistas, principalmente saber si es una ayuda ergogénica en la mayor parte de los deportes, pero también conocer qué perjuicios existen para aquellos deportistas que la utilizan para incrementar el rendimiento deportivo\", explica Del Coso.\n\nY es que los datos son muy llamativos: tres de cada cuatro deportistas de alto rendimiento utilizan la cafeína antes de la competición y muchas veces se utiliza sin saber los perjuicios que puede tener para el deportista. \"Lo que estamos investigando es cómo una sustancia, que te puede hacer rendir mejor, cómo puede afectar negativamente cuando termina una competición\".\n\nPero esta investigación no sólo puede extraer conclusiones interesantes y útiles para los deportistas profesionales, \"investigando el deporte de alto rendimiento podemos conocer los beneficios y perjuicios de la cafeína que también afectarían al deportista amateur y hacer un deporte en general más seguro\".\n\nPara el desarrollo de este proyecto de investigación están colaborando el Laboratorio de Fisiología del Ejercicio de la Universidad Camilo José Cela y la Agencia para la Protección de la Salud en el Deporte.\nPara el desarrollo de este proyecto de investigación están colaborando el Laboratorio de Fisiología del Ejercicio de la Universidad Camilo José Cela y la Agencia para la Protección de la Salud en el Deporte.","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2017-10-06","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":null,"embargoenddate":null,"source":[],"fulltext":[],"format":[],"contributor":[{"value":"Universidad Camilo José Cela, Vicerrectorado de Innovación","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"resourcetype":null,"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":{"value":"http://creativecommons.org/licenses/by-nc-nd/4.0/","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"instancetype":{"classid":"0024","classname":"Film","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null},"url":["http://hdl.handle.net/20.500.12020/800"],"distributionlocation":"","collectedfrom":{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null},"dateofacceptance":{"value":"2017-10-06","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"storagedate":null,"device":null,"size":null,"version":null,"lastmetadataupdate":null,"metadataversionnumber":null,"geolocation":[]} +{"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1585055869498,"id":"50|od______3989::2f4f3c820c450bd08dac08d07cc82dcf","originalId":["od______3989::2f4f3c820c450bd08dac08d07cc82dcf"],"collectedfrom":[{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null}],"pid":[],"dateofcollection":"2020-03-03T13:05:34.806Z","dateoftransformation":"2020-03-03T13:06:54.229Z","extraInfo":[],"oaiprovenance":{"originDescription":{"harvestDate":"2020-03-03T13:05:34.806Z","altered":true,"baseURL":"http%3A%2F%2Frepositorio.ucjc.edu%2Foai%2Frequest","identifier":"oai:repositorio.ucjc.edu:20.500.12020/797","datestamp":"2018-11-14T12:12:23Z","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"author":[{"fullname":"Cuesta Cano, Laura","name":"Laura","surname":"Cuesta Cano","rank":1,"pid":null,"affiliation":null}],"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"esl/spa","classname":"Spanish","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[{"value":"Ciencias de la Comunicación","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Marketing digital","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Medios sociales","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Marcas","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Empresas","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Emprendedores","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"II Liga de Debate CICAE - UCJC","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"6114.13 Marketing","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[{"value":"Laura Cuesta - Liga de Debate CICAE - UCJC","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[],"description":[{"value":"Laura Cuesta, profesora en la Universidad Camilo José Cela (UCJC) y Especialista en Marketing Digital y Medios Sociales para marcas y emprendedores. \n\nEn esta ocasión, Laura Cuesta, nos explica el significado de redes sociales y nos habla sobre el uso que le dan los usuarios.\n\nII Liga de Debate CICAE - UCJC http://ligacicae.ucjc.edu/\n\nLa Universidad Camilo José Cela (UCJC) y la Asociación de Colegios Privados e Independientes –Círculo de Calidad Educativa (CICAE) – organizan la segunda edición de esta gran liga de debate académico. El formato es al estilo inglés, similar al de la Oxford Union, donde algunos jóvenes se reunían a debatir sobre temas de la más candente actualidad y polemizaban con la intención de formarse en tan valoradas habilidades.\n\nLa Liga de Debate CICAE-UCJC es una ocasión única para los alumnos de ponerse en contacto con herramientas tan útiles como la oratoria y la comunicación efectiva. Pero, sobre todo, una oportunidad de fomentar el pensamiento crítico y la socialización entre alumnos con espíritu emprendedor. La Liga de Debate CICAE - Universidad Camilo José Cela se diferencia del resto de torneos en que potenciamos que sea eminentemente formativa. Alumnos y profesores reciben formación en comunicación, oratoria y debate por parte de alguno de los expertos en debate españoles más reconocidos en la escena internacional.\n\nDatos de la III Liga de Debate CICAE - UCJC http://ligacicae.ucjc.edu/","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2018-10-23","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":null,"embargoenddate":null,"source":[],"fulltext":[],"format":[],"contributor":[{"value":"Universidad Camilo José Cela, Vicerrectorado de Innovación","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"resourcetype":null,"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":{"value":"http://creativecommons.org/licenses/by-nc-nd/4.0/","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"instancetype":{"classid":"0024","classname":"Film","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null},"url":["http://hdl.handle.net/20.500.12020/797"],"distributionlocation":"","collectedfrom":{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null},"dateofacceptance":{"value":"2018-10-23","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"storagedate":null,"device":null,"size":null,"version":null,"lastmetadataupdate":null,"metadataversionnumber":null,"geolocation":[]} +{"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1585055870582,"id":"50|od______3989::752fd0b2bbac1ea1cc50e52fd46eb663","originalId":["od______3989::752fd0b2bbac1ea1cc50e52fd46eb663"],"collectedfrom":[{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null}],"pid":[],"dateofcollection":"2020-03-03T13:05:34.561Z","dateoftransformation":"2020-03-03T13:06:55.716Z","extraInfo":[],"oaiprovenance":{"originDescription":{"harvestDate":"2020-03-03T13:05:34.561Z","altered":true,"baseURL":"http%3A%2F%2Frepositorio.ucjc.edu%2Foai%2Frequest","identifier":"oai:repositorio.ucjc.edu:20.500.12020/798","datestamp":"2018-11-14T12:57:26Z","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"author":[{"fullname":"Universidad Camilo José Cela, Vicerrectorado de Innovación","name":"Vicerrectorado Innovación","surname":"Universidad Camilo José Cela","rank":1,"pid":null,"affiliation":null}],"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"esl/spa","classname":"Spanish","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[{"value":"Psicología","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Alcohol","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Dependencia","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Proyectos de investigación","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Alcoholismo","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Realidad virtual","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Psicología conductual","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[{"value":"Investigación - Intervención basada en realidad virtual sobre la dependencia del alcohol","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[],"description":[{"value":"Proyecto de investigación que se están desarrollando en la Universidad Camilo José Cela (UCJC) con tecnología aplicada al abandono de la dependencia del alcohol. \nRosa Jurado, profesora e investigadora de UCJC https://www.ucjc.edu/, explica cómo se conjuga el alcoholismo con la realidad virtual. \"Mi investigación consiste en desarrollar una técnica de intervención o rehabilitación que tiene como propósito fomentar la capacidad de inhibición para que las personas que tienen dependencia del alcohol sean capaces de controlar su conducta de aproximación hacia aquello que tiene que ver con el consumo de alcohol\".\n\nEn esta investigación, liderada por la Universidad Camilo José Cela, están participando, el Instituto de Investigación Biomédica del Hospital 12 de Octubre y la Universidad Politécnica de Madrid, con el CeDint. \"En nuestro equipo de investigación pensamos que, además del deseo de consumo, existen una carencia clara de capacidad de control inhibitorio en este tipo de personas\", destaca Rosa Jurado.\n\nLa realidad virtual sirve, según explica Gabriel Rubio, jefe de Psiquiatría del Hospital 12 de Octubre, \"para dar un paso más, para ver cómo reacciona un sujeto en un ambiente que no es el real, pero se parece mucho al real\". El objetivo final de estas acciones es \"mejorar la capacidad del individuo para poner en marcha mecanismos de inhibición conductual, para que el sujeto sea capaz de decir que no\".\n\nY no acaba ahí el proceso. La fase final del tratamiento se enfoca en que el paciente \"sea capaz de reconstruir su vida\" a través de programas de entrenamiento de habilidades sociales, asertividad, familias... \"Juntando todas las áreas de intervención, la abstinencia se mantiene mucho más tiempo y las recaídas disminuyen\".","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2017-10-05","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":null,"embargoenddate":null,"source":[],"fulltext":[],"format":[],"contributor":[{"value":"Universidad Camilo José Cela, Vicerrectorado de Innovación","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"resourcetype":null,"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":{"value":"http://creativecommons.org/licenses/by-nc-nd/4.0/","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"instancetype":{"classid":"0024","classname":"Film","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null},"url":["http://hdl.handle.net/20.500.12020/798"],"distributionlocation":"","collectedfrom":{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null},"dateofacceptance":{"value":"2017-10-05","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"storagedate":null,"device":null,"size":null,"version":null,"lastmetadataupdate":null,"metadataversionnumber":null,"geolocation":[]} +{"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1585055870750,"id":"50|od______3989::7fcbe3a03280663cddebfd3cb9203177","originalId":["od______3989::7fcbe3a03280663cddebfd3cb9203177"],"collectedfrom":[{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null}],"pid":[],"dateofcollection":"2020-03-03T13:05:25.652Z","dateoftransformation":"2020-03-03T13:06:55.95Z","extraInfo":[],"oaiprovenance":{"originDescription":{"harvestDate":"2020-03-03T13:05:25.652Z","altered":true,"baseURL":"http%3A%2F%2Frepositorio.ucjc.edu%2Foai%2Frequest","identifier":"oai:repositorio.ucjc.edu:20.500.12020/796","datestamp":"2018-11-05T13:36:54Z","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"author":[{"fullname":"Universidad Camilo José Cela, Vicerrectorado de Innovación","name":"Vicerrectorado Innovación","surname":"Universidad Camilo José Cela","rank":1,"pid":null,"affiliation":null}],"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"esl/spa","classname":"Spanish","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[{"value":"Ciencias de la Comunicación","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Redes sociales","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Comunicación","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Información","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Desinformación","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"III Liga de Debate CICAE","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Instantaneidad","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Información periodística","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Fake news","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"5910.02 Medios de Comunicación de Masas","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[{"value":"Jorge Gallardo - Liga de Debate CICAE","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[],"description":[{"value":"Jorge Gallardo, profesor de Derecho en la Universidad Camilo José Cela (UCJC), Doctor en Comunicación Audiovisual y MBA en Empresas Audiovisuales. Subdirector de Espejo Público en Antena 3 Noticias. \n\nEn esta ocasión, Jorge Gallardo habla sobre las redes sociales, cómo han transformado la manera en la que nos comunicamos y nos informamos. \n\nIII Liga de Debate CICAE - UCJC http://ligacicae.ucjc.edu/\n\nLa Universidad Camilo José Cela (UCJC) y la Asociación de Colegios Privados e Independientes –Círculo de Calidad Educativa (CICAE) – organizan la tercera edición de esta gran liga de debate académico. El formato es al estilo inglés, similar al de la Oxford Union, donde algunos jóvenes se reunían a debatir sobre temas de la más candente actualidad y polemizaban con la intención de formarse en tan valoradas habilidades.\n\nLa Liga de Debate CICAE-UCJC es una ocasión única para los alumnos de ponerse en contacto con herramientas tan útiles como la oratoria y la comunicación efectiva. Pero, sobre todo, una oportunidad de fomentar el pensamiento crítico y la socialización entre alumnos con espíritu emprendedor. La Liga de Debate CICAE - Universidad Camilo José Cela se diferencia del resto de torneos en que potenciamos que sea eminentemente formativa. Alumnos y profesores reciben formación en comunicación, oratoria y debate por parte de alguno de los expertos en debate españoles más reconocidos en la escena internacional.\n\nDatos de la III Liga de Debate CICAE - UCJC http://ligacicae.ucjc.edu/","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2018-10-23","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":null,"embargoenddate":null,"source":[],"fulltext":[],"format":[],"contributor":[],"resourcetype":null,"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":{"value":"http://creativecommons.org/licenses/by-nc-nd/4.0/","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"instancetype":{"classid":"0024","classname":"Film","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null},"url":["http://hdl.handle.net/20.500.12020/796"],"distributionlocation":"","collectedfrom":{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null},"dateofacceptance":{"value":"2018-10-23","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"storagedate":null,"device":null,"size":null,"version":null,"lastmetadataupdate":null,"metadataversionnumber":null,"geolocation":[]} +{"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1585055872001,"id":"50|od______3989::d791339867bec6d3eb2104deeb4e4961","originalId":["od______3989::d791339867bec6d3eb2104deeb4e4961"],"collectedfrom":[{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null}],"pid":[],"dateofcollection":"2020-03-03T13:05:34.586Z","dateoftransformation":"2020-03-03T13:06:57.721Z","extraInfo":[],"oaiprovenance":{"originDescription":{"harvestDate":"2020-03-03T13:05:34.586Z","altered":true,"baseURL":"http%3A%2F%2Frepositorio.ucjc.edu%2Foai%2Frequest","identifier":"oai:repositorio.ucjc.edu:20.500.12020/645","datestamp":"2018-04-12T07:59:27Z","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"author":[{"fullname":"Universidad Camilo José Cela, Vicerrectorado de Innovación","name":"Vicerrectorado Innovación","surname":"Universidad Camilo José Cela","rank":1,"pid":null,"affiliation":null}],"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"esl/spa","classname":"Spanish","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[{"value":"Transferencia de Conocimiento e Innovación","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Innovación","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Investigación","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Ciencia","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Tecnología","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[{"value":"UCJC Open Science Day 2018","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[],"description":[{"value":"El UCJC Open Science Day tiene como objetivo mostrar las actividades de investigación llevadas a cabo por investigadores de la Universidad Camilo José Cela (UCJC) a todos los miembros de la UCJC.","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2018-04-10","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":null,"embargoenddate":null,"source":[],"fulltext":[],"format":[],"contributor":[],"resourcetype":null,"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":{"value":"http://creativecommons.org/licenses/by-nc-nd/4.0/","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"instancetype":{"classid":"0024","classname":"Film","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null},"url":["http://hdl.handle.net/20.500.12020/645"],"distributionlocation":"","collectedfrom":{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null},"dateofacceptance":{"value":"2018-04-10","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"storagedate":null,"device":null,"size":null,"version":null,"lastmetadataupdate":null,"metadataversionnumber":null,"geolocation":[]} +{"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1585055872022,"id":"50|od______3989::d90d3a1f64ad264b5ebed8a35b280343","originalId":["od______3989::d90d3a1f64ad264b5ebed8a35b280343"],"collectedfrom":[{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null}],"pid":[],"dateofcollection":"2020-03-03T13:05:34.912Z","dateoftransformation":"2020-03-03T13:06:57.747Z","extraInfo":[],"oaiprovenance":{"originDescription":{"harvestDate":"2020-03-03T13:05:34.912Z","altered":true,"baseURL":"http%3A%2F%2Frepositorio.ucjc.edu%2Foai%2Frequest","identifier":"oai:repositorio.ucjc.edu:20.500.12020/795","datestamp":"2018-11-06T15:43:10Z","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"author":[{"fullname":"Universidad Camilo José Cela, Vicerrectorado de Innovación","name":"Vicerrectorado Innovación","surname":"Universidad Camilo José Cela","rank":1,"pid":null,"affiliation":null}],"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"esl/spa","classname":"Spanish","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[{"value":"Ciencias de la Comunicación","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Unión Europea","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Desinformación","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Política","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Periodismo","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Ciudadanos europeos","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Democracias","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Estado de derecho","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Derechos humanos","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"5910.02 Medios de Comunicación de Masas","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[{"value":"Ramón Luis Varcárcel - La Unión Europea ante el reto de la desinformación","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[],"description":[{"value":"Conferencia de Ramón Luis Valcárcel, vicepresidente del Parlamento Europeo, en el Campus de Almagro de la Universidad Camilo José Cela (UCJC) bajo el título \"La Unión Europea ante el reto de la desinformación\", en la que ha alertado sobre el alcance de la desinformación en las democracias y sobre cómo pone en peligro los derechos de los ciudadanos europeos. También ha participado el rector de la UCJC, Samuel Martín-Barbero.","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2018-10-11","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":null,"embargoenddate":null,"source":[],"fulltext":[],"format":[],"contributor":[],"resourcetype":null,"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":{"value":"http://creativecommons.org/licenses/by-nc-nd/4.0/","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"instancetype":{"classid":"0024","classname":"Film","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null},"url":["http://hdl.handle.net/20.500.12020/795"],"distributionlocation":"","collectedfrom":{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null},"dateofacceptance":{"value":"2018-10-11","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"storagedate":null,"device":null,"size":null,"version":null,"lastmetadataupdate":null,"metadataversionnumber":null,"geolocation":[]} diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject_datasource/dataset_10.json.gz b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject_datasource/dataset_10.json.gz deleted file mode 100644 index fdc76a04c8b66fa7d14e03e25f67e7715b7e5843..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6968 zcmV-88^`1yiwFoNN1$E+17u-zVRL14UokK)YIARH0PS7fj^j3#zMrQc3=A*>xLlHK z`R}e$UCCC^oiw_82FOki0!pN9RwU7(q)IB81~$*IPtccr(HDch%dKzo6#FE54oO+E z>%XTeQWEYqGG&`Thvz%|Js%H$>@ix<(`+Go^B;R6I~Tc#65g9HXp-|je6!$6akl+} z@q`zgMHl)Pp2crL?`tK`c}8{09{f;$rg0%;h84#N&2s^@^zw^b zq-&{)d8}lfe@ByV=8CUnE()nGAiGYQ(UjjMaUQRD%5}jkpCadjVr13*sOM z{Fxtl(JW%4#c&u;`D`2ps4c0)Qe-qag#!N3yJz-)f>vab@ECwuD49YLKlZ-i7a)sV z^NdNVu(bIc%Y(|!X;NZipYZkO1L}%Af!Yh25PtBto(6U%brTf|f`kqDiGnyn+F?n!IQyQ;ipL`AlaH!KJs6OVU zx+V+!2#9_IgO=^h*tMa)#X=;)Vyt_6f2ZbGsyq`(BFU#*Wt+Dd<3C{K`Q{yYLDS8L ztSLjzfB6Z&mcRUz?Fh3|3WZ;xn5{s;y#&%NB}WNZH>lw#qqP9$LtSe#>9)4vEwzFD zDxQ}~fdkFnAO_gxyx1Sew%_h|T2l^=STd1Iam}E1IhCGnvSmq^djjx0IRwMn6X4~V zW+Go*AwX5i)fLyjzQHM^%nBHKbO+AM=nO`Roe}@|%fFvKIq+xKkMVnA?aH$+fg8c^ zK7#ja6fVSyGLfg!!O$iQ=R$$*8D$QJR$Xcq;yN@^K`aOMrkDZl_*{LOosljkzoye~vda|cYD zz>J5!H}Hq#m0LcwdPH4_0#(yEa5_nd$e4h^3(){zNG#z$JTDY2_)^@k-$<#*h$(Tm zQ9D2EHvrK(76t!$H$hYSy(A9ekLhx!O;ibtg)}1|2jmhtXT;)r!WB zX;v`p(7|J5)^zwZdiF~WkOQ(mP}`*w@>O-NN#MVQ5>a`Bu$&$P(BR+VY{VfycT;+HmM~-PK5@}r&$2`Gd!l8IiS0Rm|O-1Ftt8lB_UY_rC zdiQ~^w=N?X=~g(38K)CZBdV5?XUy~!^KC9m72BSJ{;q<5U)WFD3v2$4Wv07FVV#5( zHqST{^eK(V0lDPFcL3cwbQ$Fvm!E+nhNMb+;85)m`hKBP4JWAj-V7kRN#vRtlmp2C>Dr+Mo8Lyu_X~G3PyerL-M3*!-GN8G{9-tL%@tT6lZ9s_g`J_3z|B` zx1o3#k_1B+JbnYi4st1y7ksO49H>N=+j$#6Nkk0mr=2<6@=;>NLF{vmErU8W9mS^3 z52K5NEI!EC;b`C;x|!r2Q~k~q(Ukei=O=~~n1(~2E+RJNQOL%lxc4y(#eT0;334Z`d9PS9Ska6nd;sDeUZQj1 zLf7syunmcfE0$aD5}WdY$&_x1{TNGZC9yZaa2>B;Y0YVhbf#*Rw*PBfDs28m^}EtT zC}%YTEC(TB?blqMGkz}Oda*8k*#51zo)v%0Foi27pkWMGzf|Aa3|W~*^`>BnXTy~l z{^Or-{`8~6|Dnj);pkd&cF3mjB=kq)U@?oLDGMTRJPXHc5yfFViF&t?vv14(E0VMC z!SkKW?#c5FhO@`y`5JlJ=J^J}q<4wuJ2~+E13!G_kLST`9)^$0^L>G5ThBM|6tZle z!KB$>w^ko(eyd5%fB8u@*tECNdmX>^YJO`hmCWe5fRPfU8~}Bi`v#axAkY?tRG@B|n!r(R~CRQwjXf2^51L4Qk^2 zrrzV+nO%hZ?(WQcn?j$&Pi0Fj#=}P&=xPD_9X%(kMI&$l;GwnbHiA z0io-bSIMA(q5#!E39yLdM1rdFfjgmjK*w@;;EfLg&*`)qj^&4OiaIw%Hx0+RDXwr+ z?4HN@Gz|S|Fk_*&@JF5(d5lJZzhG=Op2gni(cKhy8r5Ytg&VM)o5Hy%9?(rO^JbH4 z-4u)IY&0IlBN~rl_-etuc;b(S!^oe6RL7^d(M@q%_HMc<#`kqogrmpwQj9y{rI>nG zMk7Hl#R#L3488g2k)x5^xLfU$W7rhNg6fSVY^SgXoa?lo!oCfrW4)bBNPVDXi{0jX zx|2!#@{`W>)c~?Iv(=7}s|CVfB$fc|TqFO9Yhi9GV&VEtR~%S%r;!=g zr||b%xRK-IqfejI_f4F4>o;T%clU060&7Kp{)J4rlKpzDWm0K_kTGmwIGW5o&F2oD zufBEsL{Pr8<>n;Wyw5njV&tzpK`V;xrkpATTTJU@E>M_t(53iT61NkN;hwzdo4f%F=Ry75mv{m0kNuZ z@H(@)1{ouENf^Bm)Zy{A-=4#f*o|_{=_{M}q_pkD)std_XVR z7!M@^#|`g@m#oI-p^royyuaq82XX4pX3;EUJ`2a=Y3S3%gbm{`yu}fDYxWL0BE!Kr9NyCrIT=5m zCvq!K`<_VuMo*+an-8bX6X`sW5BLbdeF1ex%`LiUkn8PSGOYvgUxF#nnnR*>B;2^v z84zGM>q76g3&aVox|K|XyIb0cF2SwuDu=$Imst)J$E0xZ+{&bQy6c@aTa#5|(S>f( zSf*=U2urqTbh18ytmN}j`>YdkEb|+Hb!3Vzn#A476gx2Qk||*83gs_&4u2X4|7X<& z;d?se@!Sin3TzDT>5`l83L|A=MeU_qUEj?_38;TvZ|v}UXen!#vR-HT>dPdWtZraF0@NH5lmj4|ZS%bJRTxyy`_lx7_AX|6@im!-mM)6eh| z7#yC~X?f?Qaqd)|Y8|2z;m{i%ZdEfF+!j>YxpW;1;y0wzUx@|rpNv=zO-Fig*gDcf zS4a=OBR%}iI+i0n{DJYmmGm&0UVHmG;KRjm6!V!E2Ht}5Vd&8b_x;h7O<1h&<=sSj zxHWtCNDsr&VCDz+B0Y=-fj@mL(nBpz`=kdO^5RO;!*D(t&I7+y(!<|j1cELfI1lt{$njSs}MVnB3!ZW;YE8GC(C=3psJ5U%-cb&oq#Ln+T%b=q) zUyzLBD|zSIE1QrqudbfQc12cSv%h(j>_`kW^E7@ptECdfI4AlSTZ#$d?q7U`V) z1&^=(N^%*gUDLYI;m5hP%DB`e=2ft8S{dCy`}gy!9sJY7gdRjue2{DZ{_9M$F`jJM z7)+Ss5N+O1X(ScZCqdZGYez8K2W~l&$#kc&p)BIf2lI~o<}Jwup6}@Z50^Dr>knh9 z?{FcIWwE#V(~JvDn3t&*t5TkcAK&`|553f-iV&_Wr7FgTWrZ@)eYkM8J zLf0sxGp+-(qsRr!yr~XC(TK-x!cti5bSfP-esAd5qkABOMP#v9(@3M}sQAMi%wgjZ$r?XNo8VLVh zvr!Bcg@VM%J{LvvQz$75f#zpZg18EMTLDc5#Wb= zP*hb8pr_>p0Ru?Q$U7&6GK`s+LgS$y7{G2ejIESSjnm9>Z?tvUr7If51=Ja@pr~_> z@k+oxY<|#vW>G*H8~Q@A*Cl5Le`U0>oeEH}0X|A#Ohg}?%7%UD!6CQZ0+EHTZn$)P zYg4xH6-BUg%M?lZnN)q7%luB!#6b6UqN2ub>|jxFl?o^vD`>X&bx*7RIT{9vw%BLEy8#2C|U9kcZ40S)6fvCr&85z9mmw^joWIrd_79!idu4Q?5HhAFJzas4m_B`!-%`cAaOctr7Klhfm)lUrXlao9XQHe=r(4g4Y5E~*?H-C&IA z-5s$zx`U7FK#mC3MXc{m1Z&zre;d7q-iPURK~C~D$ECSuA3)A}`&6AIT@bs5e`(k7 z#^J3wOUQZP&8c)ikd1JX87_x}6u2X{hZ9>_Gfna_EF7MYTr4wz5A+yUAXoygcMQmU zgybt4;WIm$nP%yanyq7E+8`05%u9+Pa5Nv{=1HXrLz8IEKumefRm03r@2ov!0cb(D zUjJj6@Fi;Nxf16TA0$HGCgXT|C(JuW|Mx%td%StC;?mF=jzRfY%?T@G&6^>A(X##( z6B4PGG^^M{NcqT+b;ZQCLapef!UVR#%ruySJ~!`DeAw4s<~`msKmXlr{yi!ICR^rMFx z=eB!5HqaDZp~2AU)Ukp7kZ3wK(C-&p#|G*I8wj1gU1kHhDbTTj92@B2*gz96oCepi zfhJ+FU|tkN5siIL{WuOij^B;OJQ|O&7v97Mx;1+z*+AjgzZV-Q98CQ2BeH?4JngfA zroEfkK;dK_j9p+VM=-i0!RR@@Np9{ZIiE|rQC?!Xl8!~xcIYoWymYUkv<*ZRd?3Gk z@6rL&fniGIsf+5;DiANY^=+LCg?=uFMyumhy^KPK7%O9kNq1N*m*O6sqMPl!^8Ugm z4#RBl%*2^#bgDW;4XalM+964ATdq0QOBZdUOW}{uabEB^tHuqgi7H@-V0XwT^%^7j#XpgtU%*HxU+JpX*0B*@+<1Zem}vu zFVDojo-jxeM%Xcq@JdTc^AL*7yI3fC8dU(g{? z){x%v8br1dktMIxFHKP$&L)=`Z3HOFuG21(2{Kcx4aS3RM~N-S%W9O@4WJ;>_oAW? ztSA;_;|Vo{$G^@^Oqsn9PPRfDW2Is>?P{}6K;s-4DVlq5doU_%o=8#<7+)zv1A%%^ zGbpqu3wt!PgV-lRrki)n;F6I4lKEPn>-hmD{t_lN91bt`Yk!Wfe>DJoMwt?ZmD)!IiSwevM_K`NV`8nJ6rO#-j1c?2j zj0DJf#uu6_0!syGIxSQc66R_xW6U6JxlH#Mz%iGaJkmD?tu0CSRuCuFgb_@ZmuI}t zErSmj2S}4((DX9R$} zf4%PM2u+u}AZnUnkn>U+fi>L;1w%&PijtJac)*obd%U{C)(fb9-FcC}XWE7wAFSyI zLqdy%vhB0|=PPU~Ko>zLTy5Ulm+_zvOU+1HP-u14qJ$LqexqeQX)@MAniylYQf!gd z;p^T`#hStcmRt%w9AGr}*F8e9yz|XFL-0~^si>u=Y0^|iw}HZ%w6ZM;5$rwZ%EpH8 zgQzJUV`3mXsQ&X3hB_VNN+K-{N3+dafGfW=hV~H*8bPPZOqYO$r7-L1VaOn@I~9C^ z@!u|VS^c$Qcc0I$*BPEALn^?GR z(HSUEUDXv&heW$9JJ_r^FuDQ8b$5y#D(wzdc%B9SIIdqE7G0S*&}}Lt+q};UnAIIF zZQ}AN{f3i_f9ICCt-=t_d~e_dRrDSmo2D6&MDsyhAi~a&AwQRD$;oFFsaQ;26V&T^ zupFR@4Kl04u1jE*)f&k+V|qGU+EkVt2v*G2%_Gw)_RFT--#<=Yqv)rw}M zJ}EXYHKr6FL9@Cr1fzT#7bulgz%aX$fiH|iA0MNq(;>kMu41hY7ZS{c1al$5Tu87# z*aYk1q5PJJeU4}Ldo~=$GwTG;Y}{#{nH!NE&&=`69*}1ic%FY9&x}oG{t*5gPa+8Ay?zxi0-ltI+4n} zFmWX?;)IQfz^@1hND>qP-EtV7A(Gk=(~N$@D`&}_T&q;O#&YeqfkkE-sjagG6S#l~ z59>Fij0oZ{qwWe*nJ;9*=d~Z{kK|=QnYFlLz#h1lPXo!Db#CQhzZXQ5K9x zQOG0CrgRua!PFa$ZhP6|*6f}1o6P+C-Ory4CZowC`c14nZTn3ovn%~3ctL;s$`9wG z;oSF}-^BS%+y(vL;4INRjs(NUV~dkgL0@$zlI||D1H;DLmWk$=yFSq_4Em0%d*Y*Q z9F9c^Ya@#H6S$r$q`O4f@???9SYdc~2SlqtoWK^z>_C{gvgqJww_j)wa8@0W7c@m!o|Ymff2S(myj3wz@M6C9SU0wcgyQu@efP1bpN6^P2F9@`x@Zn} zXHOiWK$ zM2U!=Ni3v*JsVyqf8F~SgT=R2>O1x;eEoN}aQhPvN;rl6@cle@D{frKirae`x0!s_&lb19t;g@HS zaPvMp_=Kv6E9vMW_v#LJxzy$RPN$A8((rmXx`?BT{9frI=oRcXUBr#ZjxO@%PyY{f KBST|kQ~?0ZJb}sp From 8d97949316a93884a123746cec0d60f6c34b2a2e Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 7 Oct 2022 09:52:45 +0200 Subject: [PATCH 03/55] [cleaning] fixed loop in wf nodes --- .../eu/dnetlib/dhp/oa/graph/clean/oozie_app/workflow.xml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/clean/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/clean/oozie_app/workflow.xml index 2ba0a7ad7..08e74a5e5 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/clean/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/clean/oozie_app/workflow.xml @@ -492,7 +492,7 @@ --datasourcePath${workingDir}/working/hostedby --collectedfrom${collectedfrom} - + @@ -521,7 +521,7 @@ --datasourcePath${workingDir}/working/hostedby --collectedfrom${collectedfrom} - + @@ -550,7 +550,7 @@ --datasourcePath${workingDir}/working/hostedby --collectedfrom${collectedfrom} - + From ece40adc09b5124386fb4f28391f7b151ea0ceea Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 11 Oct 2022 10:10:20 +0200 Subject: [PATCH 04/55] [cleaning] fixing NPE in the country cleaning phase --- .../dhp/oa/graph/clean/country/GetDatasourceFromCountry.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/country/GetDatasourceFromCountry.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/country/GetDatasourceFromCountry.java index dd5af6998..d3741d3e8 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/country/GetDatasourceFromCountry.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/country/GetDatasourceFromCountry.java @@ -65,7 +65,6 @@ public class GetDatasourceFromCountry implements Serializable { conf, isSparkSessionManaged, spark -> { - getDatasourceFromCountry(spark, country, inputPath, workingPath); }); } @@ -83,7 +82,6 @@ public class GetDatasourceFromCountry implements Serializable { (FilterFunction) o -> !o.getDataInfo().getDeletedbyinference() && o.getCountry().getClassid().length() > 0 && o.getCountry().getClassid().equals(country)); - ; // filtering of the relations taking the non deleted by inference and those with IsProvidedBy as relclass Dataset relation = spark @@ -97,7 +95,7 @@ public class GetDatasourceFromCountry implements Serializable { !rel.getDataInfo().getDeletedbyinference()); organization - .joinWith(relation, organization.col("id").equalTo(relation.col("target")), "left") + .joinWith(relation, organization.col("id").equalTo(relation.col("target"))) .map((MapFunction, String>) t2 -> t2._2().getSource(), Encoders.STRING()) .write() .mode(SaveMode.Overwrite) From b301e9fdfffa4da0240981f97e0bd3bb5dbbfcdc Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 11 Oct 2022 11:08:52 +0200 Subject: [PATCH 05/55] [cleaning] renamed action name/description --- .../eu/dnetlib/dhp/oa/graph/clean/oozie_app/workflow.xml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/clean/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/clean/oozie_app/workflow.xml index 08e74a5e5..19e1b2a02 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/clean/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/clean/oozie_app/workflow.xml @@ -432,14 +432,14 @@ - + - + yarn cluster - Clean publications context + Select datasource ID from country eu.dnetlib.dhp.oa.graph.clean.country.GetDatasourceFromCountry dhp-graph-mapper-${projectVersion}.jar From 6163ecbf63858f5a3b34cffa19b63fa59f851d9a Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 11 Oct 2022 11:20:03 +0200 Subject: [PATCH 06/55] [cleaning] renamed parameters in wf action --- .../dhp/oa/graph/clean/oozie_app/workflow.xml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/clean/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/clean/oozie_app/workflow.xml index 19e1b2a02..6435d5131 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/clean/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/clean/oozie_app/workflow.xml @@ -471,7 +471,7 @@ yarn cluster - Clean publications counmtry + Clean publication country eu.dnetlib.dhp.oa.graph.clean.country.CleanCountrySparkJob dhp-graph-mapper-${projectVersion}.jar @@ -489,7 +489,7 @@ --workingPath${workingDir}/working/publication --country${country} --verifyParam${verifyCountryParam} - --datasourcePath${workingDir}/working/hostedby + --hostedBy${workingDir}/working/hostedby --collectedfrom${collectedfrom} @@ -500,7 +500,7 @@ yarn cluster - Clean datasets Country + Clean dataset country eu.dnetlib.dhp.oa.graph.clean.country.CleanCountrySparkJob dhp-graph-mapper-${projectVersion}.jar @@ -518,7 +518,7 @@ --workingPath${workingDir}/working/dataset --country${country} --verifyParam${verifyCountryParam} - --datasourcePath${workingDir}/working/hostedby + --hostedBy${workingDir}/working/hostedby --collectedfrom${collectedfrom} @@ -529,7 +529,7 @@ yarn cluster - Clean otherresearchproducts country + Clean otherresearchproduct country eu.dnetlib.dhp.oa.graph.clean.country.CleanCountrySparkJob dhp-graph-mapper-${projectVersion}.jar @@ -547,7 +547,7 @@ --workingPath${workingDir}/working/otherresearchproduct --country${country} --verifyParam${verifyCountryParam} - --datasourcePath${workingDir}/working/hostedby + --hostedBy${workingDir}/working/hostedby --collectedfrom${collectedfrom} @@ -558,7 +558,7 @@ yarn cluster - Clean softwares country + Clean software country eu.dnetlib.dhp.oa.graph.clean.country.CleanCountrySparkJob dhp-graph-mapper-${projectVersion}.jar @@ -576,7 +576,7 @@ --workingPath${workingDir}/working/software --country${country} --verifyParam${verifyCountryParam} - --datasourcePath${workingDir}/working/hostedby + --hostedBy${workingDir}/working/hostedby --collectedfrom${collectedfrom} From b47aaf4dd17b9446bd423637391fbe83aab80775 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 13 Oct 2022 11:23:43 +0200 Subject: [PATCH 07/55] [cleaning] subjects declared as belonging to specific vocabularies whose values are not found in the vocab are set to type keyword --- .../oaf/utils/GraphCleaningFunctions.java | 2 +- .../dhp/oa/graph/clean/CleaningRuleMap.java | 37 ++++++++++--------- .../clean/country/CleanCountrySparkJob.java | 2 +- .../clean/GraphCleaningFunctionsTest.java | 10 +++++ .../eu/dnetlib/dhp/oa/graph/clean/result.json | 22 +++++++++++ 5 files changed, 54 insertions(+), 19 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java index 775f228eb..363f95423 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java @@ -211,7 +211,7 @@ public class GraphCleaningFunctions extends CleaningFunctions { .orElse(s.getValue()), Function.identity(), (s1, s2) -> Collections - .min(Lists.newArrayList(s1, s1), new SubjectProvenanceComparator()))) + .min(Lists.newArrayList(s1, s2), new SubjectProvenanceComparator()))) .values()); r.setSubject(subjects); } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRuleMap.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRuleMap.java index 147e26699..5f3b4e1ca 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRuleMap.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRuleMap.java @@ -3,6 +3,7 @@ package eu.dnetlib.dhp.oa.graph.clean; import java.io.Serializable; import java.util.HashMap; +import java.util.Objects; import java.util.concurrent.atomic.AtomicReference; import org.apache.commons.lang3.SerializationUtils; @@ -10,6 +11,7 @@ import org.apache.commons.lang3.StringUtils; import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableConsumer; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; +import eu.dnetlib.dhp.common.vocabulary.VocabularyTerm; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.*; @@ -31,29 +33,30 @@ public class CleaningRuleMap extends HashMap, SerializableConsumer modified = new AtomicReference<>(false); + vocabularies.find(vocabularyId).ifPresent(vocabulary -> { - if (!ModelConstants.DNET_SUBJECT_KEYWORD.equalsIgnoreCase(subject.getQualifier().getClassid())) { - return; - } - Qualifier newValue = vocabulary.lookup(subject.getValue()); - if (!ModelConstants.UNKNOWN.equals(newValue.getClassid())) { - subject.setValue(newValue.getClassid()); - subject.getQualifier().setClassid(vocabularyId); - subject.getQualifier().setClassname(vocabulary.getName()); - modified.set(true); + if (ModelConstants.DNET_SUBJECT_KEYWORD.equalsIgnoreCase(subject.getQualifier().getClassid())) { + Qualifier newValue = vocabulary.lookup(subject.getValue()); + if (!ModelConstants.UNKNOWN.equals(newValue.getClassid())) { + subject.setValue(newValue.getClassid()); + subject.getQualifier().setClassid(vocabularyId); + subject.getQualifier().setClassname(vocabulary.getName()); + } + } else if (vocabularyId.equals(subject.getQualifier().getClassid())) { + Qualifier syn = vocabulary.getSynonymAsQualifier(subject.getValue()); + VocabularyTerm term = vocabulary.getTerm(subject.getValue()); + if (Objects.isNull(syn) && Objects.isNull(term)) { + subject.getQualifier().setClassid(ModelConstants.DNET_SUBJECT_KEYWORD); + subject.getQualifier().setClassname(ModelConstants.DNET_SUBJECT_KEYWORD); + } } }); - return modified.get(); } private static void cleanRelation(VocabularyGroup vocabularies, Relation r) { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/country/CleanCountrySparkJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/country/CleanCountrySparkJob.java index cd77f342e..45590f789 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/country/CleanCountrySparkJob.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/country/CleanCountrySparkJob.java @@ -43,7 +43,7 @@ public class CleanCountrySparkJob implements Serializable { String jsonConfiguration = IOUtils .toString( - CleanContextSparkJob.class + CleanCountrySparkJob.class .getResourceAsStream( "/eu/dnetlib/dhp/oa/graph/input_clean_country_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java index 6c43da832..4035307e5 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java @@ -278,6 +278,16 @@ public class GraphCleaningFunctionsTest { s -> "0102 computer and information sciences".equals(s.getValue()) & ModelConstants.DNET_SUBJECT_FOS_CLASSID.equals(s.getQualifier().getClassid()))); + List s1 = p_cleaned + .getSubject() + .stream() + .filter(s -> s.getValue().equals("In Situ Hybridization")) + .collect(Collectors.toList()); + assertNotNull(s1); + assertEquals(1, s1.size()); + assertEquals(ModelConstants.DNET_SUBJECT_KEYWORD, s1.get(0).getQualifier().getClassid()); + assertEquals(ModelConstants.DNET_SUBJECT_KEYWORD, s1.get(0).getQualifier().getClassname()); + // TODO add more assertions to verity the cleaned values System.out.println(MAPPER.writeValueAsString(p_cleaned)); } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result.json index 8e4fc4545..84ff35c08 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result.json @@ -706,6 +706,28 @@ "source": [ ], "subject": [ + { + "dataInfo": { + "provenanceaction": { + "classid": "sysimport:crosswalk:repository", + "classname": "sysimport:crosswalk:repository", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "deletedbyinference": false, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "0.9" + }, + "qualifier": { + "classid": "FOS", + "classname": "Fields of Science and Technology classification", + "schemeid": "dnet:result_subject", + "schemename": "dnet:result_subject" + }, + "value": "In Situ Hybridization" + }, { "dataInfo": { "deletedbyinference": false, From a1f94530a3a170b8095de027660df0c2d4a35b09 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Thu, 13 Oct 2022 11:47:11 +0200 Subject: [PATCH 08/55] added documentation --- .../DataciteToOAFTransformation.scala | 122 +++++++++++------- 1 file changed, 74 insertions(+), 48 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala index c29614d33..29f5cb99c 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala @@ -24,11 +24,11 @@ import scala.io.Source object DataciteToOAFTransformation { case class HostedByMapType( - openaire_id: String, - datacite_name: String, - official_name: String, - similarity: Option[Float] - ) {} + openaire_id: String, + datacite_name: String, + official_name: String, + similarity: Option[Float] + ) {} val mapper = new ObjectMapper() @@ -47,12 +47,12 @@ object DataciteToOAFTransformation { } /** This method should skip record if json contains invalid text - * defined in file datacite_filter - * - * @param record : unparsed datacite record - * @param json : parsed record - * @return True if the record should be skipped - */ + * defined in file datacite_filter + * + * @param record : not parsed Datacite record + * @param json : parsed record + * @return True if the record should be skipped + */ def skip_record(record: String, json: org.json4s.JValue): Boolean = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats datacite_filter.exists(f => record.contains(f)) || (json \\ "publisher") @@ -98,6 +98,11 @@ object DataciteToOAFTransformation { } + /** + * This utility method indicates whether the embargo date has been reached + * @param embargo_end_date + * @return True if the embargo date has been reached, false otherwise + */ def embargo_end(embargo_end_date: String): Boolean = { val dt = LocalDate.parse(embargo_end_date, DateTimeFormatter.ofPattern("[yyyy-MM-dd]")) val td = LocalDate.now() @@ -142,12 +147,27 @@ object DataciteToOAFTransformation { } } + /*** + * Use the vocabulary dnet:publication_resource to find a synonym to one of these terms and get the instance.type. + * Using the dnet:result_typologies vocabulary, we look up the instance.type synonym + * to generate one of the following main entities: + * - publication + * - dataset + * - software + * - otherresearchproduct + + * @param resourceType + * @param resourceTypeGeneral + * @param schemaOrg + * @param vocabularies + * @return + */ def getTypeQualifier( - resourceType: String, - resourceTypeGeneral: String, - schemaOrg: String, - vocabularies: VocabularyGroup - ): (Qualifier, Qualifier) = { + resourceType: String, + resourceTypeGeneral: String, + schemaOrg: String, + vocabularies: VocabularyGroup + ): (Qualifier, Qualifier) = { if (resourceType != null && resourceType.nonEmpty) { val typeQualifier = vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, resourceType) @@ -192,11 +212,11 @@ object DataciteToOAFTransformation { } def getResult( - resourceType: String, - resourceTypeGeneral: String, - schemaOrg: String, - vocabularies: VocabularyGroup - ): Result = { + resourceType: String, + resourceTypeGeneral: String, + schemaOrg: String, + vocabularies: VocabularyGroup + ): Result = { val typeQualifiers: (Qualifier, Qualifier) = getTypeQualifier(resourceType, resourceTypeGeneral, schemaOrg, vocabularies) if (typeQualifiers == null) @@ -238,11 +258,11 @@ object DataciteToOAFTransformation { } /** As describe in ticket #6377 - * when the result come from figshare we need to remove subject - * and set Access rights OPEN. - * - * @param r - */ + * when the result come from figshare we need to remove subject + * and set Access rights OPEN. + * + * @param r + */ def fix_figshare(r: Result): Unit = { if (r.getInstance() != null) { @@ -269,12 +289,12 @@ object DataciteToOAFTransformation { } def generateRelation( - sourceId: String, - targetId: String, - relClass: String, - cf: KeyValue, - di: DataInfo - ): Relation = { + sourceId: String, + targetId: String, + relClass: String, + cf: KeyValue, + di: DataInfo + ): Relation = { val r = new Relation r.setSource(sourceId) @@ -303,12 +323,12 @@ object DataciteToOAFTransformation { } def generateOAF( - input: String, - ts: Long, - dateOfCollection: Long, - vocabularies: VocabularyGroup, - exportLinks: Boolean - ): List[Oaf] = { + input: String, + ts: Long, + dateOfCollection: Long, + vocabularies: VocabularyGroup, + exportLinks: Boolean + ): List[Oaf] = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json = parse(input) @@ -330,6 +350,7 @@ object DataciteToOAFTransformation { if (result == null) return List() + // DOI is mapped on a PID inside a Instance object val doi_q = OafMapperUtils.qualifier( "doi", "doi", @@ -338,6 +359,8 @@ object DataciteToOAFTransformation { ) val pid = OafMapperUtils.structuredProperty(doi, doi_q, dataInfo) result.setPid(List(pid).asJava) + + // This identifiere will be replaced in a second moment using the PID logic generation result.setId(OafMapperUtils.createOpenaireId(50, s"datacite____::$doi", true)) result.setOriginalId(List(doi).asJava) @@ -386,6 +409,11 @@ object DataciteToOAFTransformation { a } + if (authors == null || authors.isEmpty || !authors.exists(a => a != null)) + return List() + result.setAuthor(authors.asJava) + + val titles: List[TitleType] = (json \\ "titles").extractOrElse[List[TitleType]](List()) result.setTitle( @@ -409,9 +437,7 @@ object DataciteToOAFTransformation { .asJava ) - if (authors == null || authors.isEmpty || !authors.exists(a => a != null)) - return List() - result.setAuthor(authors.asJava) + val dates = (json \\ "dates").extract[List[DateType]] val publication_year = (json \\ "publicationYear").extractOrElse[String](null) @@ -619,16 +645,16 @@ object DataciteToOAFTransformation { } private def generateRelations( - rels: List[RelatedIdentifierType], - id: String, - date: String - ): List[Relation] = { + rels: List[RelatedIdentifierType], + id: String, + date: String + ): List[Relation] = { rels .filter(r => subRelTypeMapping .contains(r.relationType) && (r.relatedIdentifierType.equalsIgnoreCase("doi") || - r.relatedIdentifierType.equalsIgnoreCase("pmid") || - r.relatedIdentifierType.equalsIgnoreCase("arxiv")) + r.relatedIdentifierType.equalsIgnoreCase("pmid") || + r.relatedIdentifierType.equalsIgnoreCase("arxiv")) ) .map(r => { val rel = new Relation @@ -660,4 +686,4 @@ object DataciteToOAFTransformation { s"10|$b::${DHPUtils.md5(a)}" } -} +} \ No newline at end of file From ae7cd0735a8528cd419ce2863077d436bd34cd0d Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 14 Oct 2022 15:47:58 +0200 Subject: [PATCH 09/55] [graph2hive] more partitions --- .../eu/dnetlib/dhp/oa/graph/hive/oozie_app/workflow.xml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/workflow.xml index ba5f4f375..4468382be 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/workflow.xml @@ -126,6 +126,7 @@ --hiveDbName${hiveDbName} --classNameeu.dnetlib.dhp.schema.oaf.Publication --hiveMetastoreUris${hiveMetastoreUris} + --numPartitions8000 @@ -152,6 +153,7 @@ --hiveDbName${hiveDbName} --classNameeu.dnetlib.dhp.schema.oaf.Dataset --hiveMetastoreUris${hiveMetastoreUris} + --numPartitions4000 @@ -178,6 +180,7 @@ --hiveDbName${hiveDbName} --classNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct --hiveMetastoreUris${hiveMetastoreUris} + --numPartitions3000 @@ -204,6 +207,7 @@ --hiveDbName${hiveDbName} --classNameeu.dnetlib.dhp.schema.oaf.Software --hiveMetastoreUris${hiveMetastoreUris} + --numPartitions300 @@ -230,6 +234,7 @@ --hiveDbName${hiveDbName} --classNameeu.dnetlib.dhp.schema.oaf.Datasource --hiveMetastoreUris${hiveMetastoreUris} + --numPartitions100 @@ -256,6 +261,7 @@ --hiveDbName${hiveDbName} --classNameeu.dnetlib.dhp.schema.oaf.Organization --hiveMetastoreUris${hiveMetastoreUris} + --numPartitions400 @@ -309,6 +315,7 @@ --hiveDbName${hiveDbName} --classNameeu.dnetlib.dhp.schema.oaf.Relation --hiveMetastoreUris${hiveMetastoreUris} + --numPartitions10000 From 72f0d88d6ce9536a2577b24b8c5d7242b5dc6e39 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Wed, 19 Oct 2022 14:18:42 +0200 Subject: [PATCH 10/55] formatted code --- .../DataciteToOAFTransformation.scala | 132 +++++++++--------- 1 file changed, 64 insertions(+), 68 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala index 29f5cb99c..a7ad9e2d6 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala @@ -24,11 +24,11 @@ import scala.io.Source object DataciteToOAFTransformation { case class HostedByMapType( - openaire_id: String, - datacite_name: String, - official_name: String, - similarity: Option[Float] - ) {} + openaire_id: String, + datacite_name: String, + official_name: String, + similarity: Option[Float] + ) {} val mapper = new ObjectMapper() @@ -47,12 +47,12 @@ object DataciteToOAFTransformation { } /** This method should skip record if json contains invalid text - * defined in file datacite_filter - * - * @param record : not parsed Datacite record - * @param json : parsed record - * @return True if the record should be skipped - */ + * defined in file datacite_filter + * + * @param record : not parsed Datacite record + * @param json : parsed record + * @return True if the record should be skipped + */ def skip_record(record: String, json: org.json4s.JValue): Boolean = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats datacite_filter.exists(f => record.contains(f)) || (json \\ "publisher") @@ -98,11 +98,10 @@ object DataciteToOAFTransformation { } - /** - * This utility method indicates whether the embargo date has been reached - * @param embargo_end_date - * @return True if the embargo date has been reached, false otherwise - */ + /** This utility method indicates whether the embargo date has been reached + * @param embargo_end_date + * @return True if the embargo date has been reached, false otherwise + */ def embargo_end(embargo_end_date: String): Boolean = { val dt = LocalDate.parse(embargo_end_date, DateTimeFormatter.ofPattern("[yyyy-MM-dd]")) val td = LocalDate.now() @@ -147,27 +146,27 @@ object DataciteToOAFTransformation { } } - /*** - * Use the vocabulary dnet:publication_resource to find a synonym to one of these terms and get the instance.type. - * Using the dnet:result_typologies vocabulary, we look up the instance.type synonym - * to generate one of the following main entities: - * - publication - * - dataset - * - software - * - otherresearchproduct - - * @param resourceType - * @param resourceTypeGeneral - * @param schemaOrg - * @param vocabularies - * @return - */ + /** * + * Use the vocabulary dnet:publication_resource to find a synonym to one of these terms and get the instance.type. + * Using the dnet:result_typologies vocabulary, we look up the instance.type synonym + * to generate one of the following main entities: + * - publication + * - dataset + * - software + * - otherresearchproduct + * + * @param resourceType + * @param resourceTypeGeneral + * @param schemaOrg + * @param vocabularies + * @return + */ def getTypeQualifier( - resourceType: String, - resourceTypeGeneral: String, - schemaOrg: String, - vocabularies: VocabularyGroup - ): (Qualifier, Qualifier) = { + resourceType: String, + resourceTypeGeneral: String, + schemaOrg: String, + vocabularies: VocabularyGroup + ): (Qualifier, Qualifier) = { if (resourceType != null && resourceType.nonEmpty) { val typeQualifier = vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, resourceType) @@ -212,11 +211,11 @@ object DataciteToOAFTransformation { } def getResult( - resourceType: String, - resourceTypeGeneral: String, - schemaOrg: String, - vocabularies: VocabularyGroup - ): Result = { + resourceType: String, + resourceTypeGeneral: String, + schemaOrg: String, + vocabularies: VocabularyGroup + ): Result = { val typeQualifiers: (Qualifier, Qualifier) = getTypeQualifier(resourceType, resourceTypeGeneral, schemaOrg, vocabularies) if (typeQualifiers == null) @@ -258,11 +257,11 @@ object DataciteToOAFTransformation { } /** As describe in ticket #6377 - * when the result come from figshare we need to remove subject - * and set Access rights OPEN. - * - * @param r - */ + * when the result come from figshare we need to remove subject + * and set Access rights OPEN. + * + * @param r + */ def fix_figshare(r: Result): Unit = { if (r.getInstance() != null) { @@ -289,12 +288,12 @@ object DataciteToOAFTransformation { } def generateRelation( - sourceId: String, - targetId: String, - relClass: String, - cf: KeyValue, - di: DataInfo - ): Relation = { + sourceId: String, + targetId: String, + relClass: String, + cf: KeyValue, + di: DataInfo + ): Relation = { val r = new Relation r.setSource(sourceId) @@ -323,12 +322,12 @@ object DataciteToOAFTransformation { } def generateOAF( - input: String, - ts: Long, - dateOfCollection: Long, - vocabularies: VocabularyGroup, - exportLinks: Boolean - ): List[Oaf] = { + input: String, + ts: Long, + dateOfCollection: Long, + vocabularies: VocabularyGroup, + exportLinks: Boolean + ): List[Oaf] = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json = parse(input) @@ -413,7 +412,6 @@ object DataciteToOAFTransformation { return List() result.setAuthor(authors.asJava) - val titles: List[TitleType] = (json \\ "titles").extractOrElse[List[TitleType]](List()) result.setTitle( @@ -437,8 +435,6 @@ object DataciteToOAFTransformation { .asJava ) - - val dates = (json \\ "dates").extract[List[DateType]] val publication_year = (json \\ "publicationYear").extractOrElse[String](null) @@ -645,16 +641,16 @@ object DataciteToOAFTransformation { } private def generateRelations( - rels: List[RelatedIdentifierType], - id: String, - date: String - ): List[Relation] = { + rels: List[RelatedIdentifierType], + id: String, + date: String + ): List[Relation] = { rels .filter(r => subRelTypeMapping .contains(r.relationType) && (r.relatedIdentifierType.equalsIgnoreCase("doi") || - r.relatedIdentifierType.equalsIgnoreCase("pmid") || - r.relatedIdentifierType.equalsIgnoreCase("arxiv")) + r.relatedIdentifierType.equalsIgnoreCase("pmid") || + r.relatedIdentifierType.equalsIgnoreCase("arxiv")) ) .map(r => { val rel = new Relation @@ -686,4 +682,4 @@ object DataciteToOAFTransformation { s"10|$b::${DHPUtils.md5(a)}" } -} \ No newline at end of file +} From bca4a61710ed7d394937a28973f44018d99aa2be Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 19 Oct 2022 15:20:58 +0200 Subject: [PATCH 11/55] suppressing hyper verbose spark logs during unit test execution --- .../src/test/resources/log4j.properties | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 dhp-workflows/dhp-enrichment/src/test/resources/log4j.properties diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/log4j.properties b/dhp-workflows/dhp-enrichment/src/test/resources/log4j.properties new file mode 100644 index 000000000..ce37270c6 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/resources/log4j.properties @@ -0,0 +1,25 @@ +# Root logger option +log4j.rootLogger=DEBUG, stdout + +# Direct log messages to stdout +log4j.appender.stdout=org.apache.log4j.ConsoleAppender +log4j.appender.stdout.Target=System.out +log4j.appender.stdout.layout=org.apache.log4j.PatternLayout +log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n + +# Change this to set Spark log level +log4j.logger.org.apache.spark=ERROR +log4j.rootCategory=WARN + +# Silence akka remoting +log4j.logger.Remoting=WARN + +# Ignore messages below warning level from Jetty, because it's a bit verbose +log4j.logger.org.eclipse.jetty=WARN + +log4j.logger.org.apache.hadoop.mapreduce.lib.output.FileOutputCommitterFactory=WARN +log4j.logger.org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter=WARN +log4j.logger.org.apache.parquet.hadoop.ParquetOutputFormat=WARN +log4j.logger.org.apache.parquet.hadoop.InternalParquetRecordWriter=WARN +log4j.logger.org.apache.hadoop.io.compress.CodecPool=WARN +log4j.logger.org.apache.parquet.hadoop.codec.CodecConfig=WARN \ No newline at end of file From 31a10f000b200d047ad50be1c0a376a7316d8be9 Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Sun, 23 Oct 2022 18:05:37 +0200 Subject: [PATCH 12/55] Map the field oaf:eoscifguidelines from mdstores. Currently we can find it in ROHub metadata --- .../raw/AbstractMdRecordToOafMapper.java | 20 +++++++++++++++++++ .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 1 + .../eu/dnetlib/dhp/oa/graph/raw/rohub.xml | 5 ++++- 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java index c157be51a..a33a04be3 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java @@ -366,6 +366,7 @@ public abstract class AbstractMdRecordToOafMapper { r.setInstance(instances); r.setBestaccessright(OafMapperUtils.createBestAccessRights(instances)); + r.setEoscifguidelines(prepareEOSCIfGuidelines(doc, info)); } protected abstract List prepareResultPids(Document doc, DataInfo info); @@ -384,6 +385,25 @@ public abstract class AbstractMdRecordToOafMapper { return list; } + private List prepareEOSCIfGuidelines(Document doc, DataInfo info){ + final Set set = Sets.newHashSet(); + for (final Object o : doc.selectNodes("//oaf:eoscifguidelines")) { + final String code = ((Node) o).valueOf("@code"); + final String label = ((Node) o).valueOf("@label"); + final String url = ((Node) o).valueOf("@url"); + final String semrel = ((Node) o).valueOf("@semrel"); + if (StringUtils.isNotBlank(code)) { + final EoscIfGuidelines eig = new EoscIfGuidelines(); + eig.setCode(code); + eig.setLabel(label); + eig.setUrl(url); + eig.setSemanticRelation(semrel); + set.add(eig); + } + } + return Lists.newArrayList(set); + } + protected abstract Qualifier prepareResourceType(Document doc, DataInfo info); protected abstract List prepareInstances( diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index f0eadbd0d..8c9b3caba 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -947,6 +947,7 @@ class MappersTest { Instance inst = p.getInstance().get(0); assertEquals("https://w3id.org/ro-id/0ab171a7-45c5-4194-82d4-850955504bca", inst.getPid().get(0).getValue()); assertEquals("https://w3id.org/ro-id/0ab171a7-45c5-4194-82d4-850955504bca", inst.getUrl().get(0)); + assertEquals(1, p.getEoscifguidelines().size()); } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/rohub.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/rohub.xml index e1e30c3de..18f637ecc 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/rohub.xml +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/rohub.xml @@ -65,7 +65,6 @@ Ecology - EOSC::RO-crate https://w3id.org/ro-id/0ab171a7-45c5-4194-82d4-850955504bca @@ -75,5 +74,9 @@ + \ No newline at end of file From ee759ac92da0116f2f6c0c8b11aacce98e5a55a3 Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Sun, 23 Oct 2022 18:09:47 +0200 Subject: [PATCH 13/55] file format after mvn compile --- .../dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java index a33a04be3..02b1e7e7c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java @@ -385,7 +385,7 @@ public abstract class AbstractMdRecordToOafMapper { return list; } - private List prepareEOSCIfGuidelines(Document doc, DataInfo info){ + private List prepareEOSCIfGuidelines(Document doc, DataInfo info) { final Set set = Sets.newHashSet(); for (final Object o : doc.selectNodes("//oaf:eoscifguidelines")) { final String code = ((Node) o).valueOf("@code"); From 208ed323153a0b19189f6ddf65776f67a6df2e67 Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Sun, 23 Oct 2022 18:18:13 +0200 Subject: [PATCH 14/55] fixed xpath for semantic relation --- .../dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java | 2 +- .../test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java index 02b1e7e7c..7aa40cb8a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java @@ -391,7 +391,7 @@ public abstract class AbstractMdRecordToOafMapper { final String code = ((Node) o).valueOf("@code"); final String label = ((Node) o).valueOf("@label"); final String url = ((Node) o).valueOf("@url"); - final String semrel = ((Node) o).valueOf("@semrel"); + final String semrel = ((Node) o).valueOf("@semanticrelation"); if (StringUtils.isNotBlank(code)) { final EoscIfGuidelines eig = new EoscIfGuidelines(); eig.setCode(code); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index 8c9b3caba..ad733bec0 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -948,6 +948,10 @@ class MappersTest { assertEquals("https://w3id.org/ro-id/0ab171a7-45c5-4194-82d4-850955504bca", inst.getPid().get(0).getValue()); assertEquals("https://w3id.org/ro-id/0ab171a7-45c5-4194-82d4-850955504bca", inst.getUrl().get(0)); assertEquals(1, p.getEoscifguidelines().size()); + assertEquals("EOSC::RO-crate", p.getEoscifguidelines().get(0).getCode()); + assertEquals("EOSC::RO-crate", p.getEoscifguidelines().get(0).getLabel()); + assertEquals("", p.getEoscifguidelines().get(0).getUrl()); + assertEquals("compliesWith", p.getEoscifguidelines().get(0).getSemanticRelation()); } From 2b9a20a4a378d887d801df3eb6958bd20b079276 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Mon, 24 Oct 2022 12:53:47 +0200 Subject: [PATCH 15/55] Changed the way Scholexplorer filter the relationships, I found that filter all relation coming from openCitation is wrong, because we loose a lot of relation than intersect OpenCitation, but they don't come only from there --- .../sx/graph/SparkConvertRDDtoDataset.scala | 77 ++++++++----------- 1 file changed, 34 insertions(+), 43 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala index 556106180..362cb2028 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala @@ -116,54 +116,45 @@ object SparkConvertRDDtoDataset { .map(s => mapper.readValue(s, classOf[Relation])) .filter(r => r.getDataInfo != null && !r.getDataInfo.getDeletedbyinference) .filter(r => r.getSource.startsWith("50") && r.getTarget.startsWith("50")) - .filter(r => filterRelations(subRelTypeFilter, relClassFilter, r)) - //filter OpenCitations relations - .filter(r => - r.getDataInfo.getProvenanceaction != null && - !"sysimport:crosswalk:opencitations".equals(r.getDataInfo.getProvenanceaction.getClassid) - ) + .filter(r => filterRelations(r)) + //filter OpenCitations relations +// .filter(r => +// r.getDataInfo.getProvenanceaction != null && +// !"sysimport:crosswalk:opencitations".equals(r.getDataInfo.getProvenanceaction.getClassid) +// ) spark.createDataset(rddRelation).as[Relation].write.mode(SaveMode.Overwrite).save(s"$relPath") } - private def filterRelations(subRelTypeFilter: String, relClassFilter: List[String], r: Relation): Boolean = { - if (StringUtils.isNotBlank(subRelTypeFilter)) { - subRelTypeFilter.equalsIgnoreCase(r.getSubRelType) - } else { - !relClassFilter.exists(k => k.equalsIgnoreCase(r.getRelClass)) + private def filterRelations(r: Relation): Boolean = { + + /** * + * We filter relation generated by dedups + * and all the relation that have one single collectedFrom OpenCitation + */ + + val relClassFilter = List( + ModelConstants.MERGES, + ModelConstants.IS_MERGED_IN, + ModelConstants.HAS_AMONG_TOP_N_SIMILAR_DOCS, + ModelConstants.IS_AMONG_TOP_N_SIMILAR_DOCS + ) + if (relClassFilter.exists(k => k.equalsIgnoreCase(r.getRelClass))) + false + else { + if (r.getCollectedfrom == null || r.getCollectedfrom.size() == 0) + false + else if (r.getCollectedfrom.size() > 1) + true + else if ( + r.getCollectedfrom.size() == 1 && r.getCollectedfrom.get(0) != null && "OpenCitations".equalsIgnoreCase( + r.getCollectedfrom.get(0).getValue + ) + ) + false + else + true } } - /* - //TODO: finalise implementation - private def processResult[T<: Result]( - implicit ct: ClassTag[T], - log: Logger, - spark: SparkSession, - sourcePath: String, - entityPath: String, - clazz: Class[T] - ): Unit = { - val entityType = clazz.getSimpleName.toLowerCase - - log.info(s"Converting $entityType") - - val mapper = new ObjectMapper() with ScalaObjectMapper - mapper.registerModule(DefaultScalaModule) - - val rdd = spark.sparkContext - .textFile(s"$sourcePath/$entityType") - .map(s => mapper.readValue(s, clazz)) - .filter(r => r.getDataInfo != null && !r.getDataInfo.getDeletedbyinference); - - implicit val encoder: Encoder[T] = Encoders.kryo(clazz) - spark - .createDataset(rdd) - .as[T] - .write - .mode(SaveMode.Overwrite) - .save(s"$entityPath/$entityType") - } - */ - } From 5df9c6396336e895708d00642ebb229825dc7665 Mon Sep 17 00:00:00 2001 From: dimitrispie Date: Thu, 27 Oct 2022 16:44:26 +0300 Subject: [PATCH 16/55] Added fields: totalcost, fundedamount, currency, in project table --- .../dhp/oa/graph/stats/oozie_app/scripts/step11.sql | 4 +++- .../dhp/oa/graph/stats/oozie_app/scripts/step13.sql | 8 ++++---- .../dhp/oa/graph/stats/oozie_app/scripts/step6.sql | 8 ++++++-- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step11.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step11.sql index d699b68c3..41c3ed751 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step11.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step11.sql @@ -42,7 +42,9 @@ SELECT p.id, CASE WHEN prr2.id IS NULL THEN 0 ELSE prr2.dp END AS delayedpubs, p.callidentifier, p.code, - p.totalcost + p.totalcost, + p.fundedamount, + p.currency FROM ${stats_db_name}.project_tmp p LEFT JOIN (SELECT pr.id, count(distinct pr.result) AS np FROM ${stats_db_name}.project_results pr diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql index aee66fd5e..24e1a1355 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql @@ -59,7 +59,7 @@ UNION ALL SELECT * FROM ${stats_db_name}.otherresearchproduct_sources; -create table ${stats_db_name}.result_orcid STORED AS PARQUET as +CREATE TABLE IF NOT EXISTS ${stats_db_name}.result_orcid STORED AS PARQUET as select distinct res.id, regexp_replace(res.orcid, 'http://orcid.org/' ,'') as orcid from ( SELECT substr(res.id, 4) as id, auth_pid.value as orcid @@ -69,7 +69,7 @@ from ( LATERAL VIEW explode(auth.pid.qualifier.classid) apt as author_pid_type WHERE res.datainfo.deletedbyinference = FALSE and res.datainfo.invisible = FALSE and author_pid_type = 'orcid') as res; -create table ${stats_db_name}.result_result stored as parquet as +CREATE TABLE IF NOT EXISTS ${stats_db_name}.result_result stored as parquet as select substr(rel.source, 4) as source, substr(rel.target, 4) as target, relclass, subreltype from ${openaire_db_name}.relation rel join ${openaire_db_name}.result r1 on rel.source=r1.id @@ -82,7 +82,7 @@ where reltype='resultResult' and r2.resulttype.classname != 'other' and rel.datainfo.deletedbyinference=false and rel.datainfo.invisible = FALSE; -create table ${stats_db_name}.result_citations_oc stored as parquet as +CREATE TABLE IF NOT EXISTS ${stats_db_name}.result_citations_oc stored as parquet as select substr(target, 4) as id, count(distinct substr(source, 4)) as citations from ${openaire_db_name}.relation rel join ${openaire_db_name}.result r1 on rel.source=r1.id @@ -97,7 +97,7 @@ where relClass='Cites' and rel.datainfo.provenanceaction.classid = 'sysimport:cr and rel.datainfo.deletedbyinference=false and rel.datainfo.invisible = FALSE group by substr(target, 4); -create table ${stats_db_name}.result_references_oc stored as parquet as +CREATE TABLE IF NOT EXISTS ${stats_db_name}.result_references_oc stored as parquet as select substr(source, 4) as id, count(distinct substr(target, 4)) as references from ${openaire_db_name}.relation rel join ${openaire_db_name}.result r1 on rel.source=r1.id diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql index 5461afde6..c31180c14 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql @@ -48,7 +48,9 @@ CREATE TABLE ${stats_db_name}.project_tmp delayedpubs INT, callidentifier STRING, code STRING, - totalcost FLOAT + totalcost FLOAT, + fundedamount FLOAT, + currency STRING ) CLUSTERED BY (id) INTO 100 buckets stored AS orc tblproperties ('transactional' = 'true'); INSERT INTO ${stats_db_name}.project_tmp @@ -72,7 +74,9 @@ SELECT substr(p.id, 4) AS id, 0 AS delayedpubs, p.callidentifier.value AS callidentifier, p.code.value AS code, - p.totalcost AS totalcost + p.totalcost AS totalcost, + p.fundedamount AS fundedamount, + p.currency.value AS currency FROM ${openaire_db_name}.project p WHERE p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; From 7861c472e0c8e4c3084a04721dafc736111c4963 Mon Sep 17 00:00:00 2001 From: dimitrispie Date: Fri, 28 Oct 2022 19:00:32 +0300 Subject: [PATCH 17/55] Hive memory parameters --- .../eu/dnetlib/dhp/oa/graph/stats/oozie_app/config-default.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/config-default.xml b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/config-default.xml index 9331d4ac5..63fc84d75 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/config-default.xml +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/config-default.xml @@ -21,7 +21,7 @@ hive_jdbc_url - jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000 + jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000/;UseNativeQuery=1;?spark.executor.memory=19166291558;spark.yarn.executor.memoryOverhead=3225;spark.driver.memory=11596411699;spark.yarn.driver.memoryOverhead=1228 oozie.wf.workflow.notification.url From 7fda05e3803bebd087c382411d88914e21c6f2db Mon Sep 17 00:00:00 2001 From: dimitrispie Date: Tue, 1 Nov 2022 13:59:40 +0200 Subject: [PATCH 18/55] Added Autonomous University of Barcelona --- .../graph/stats/oozie_app/scripts/step20-createMonitorDB.sql | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql index 2505c3a34..5dbedfa67 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql @@ -48,7 +48,8 @@ create table TARGET.result stored as parquet as 'openorgs____::3e8d1f8c3f6cd7f418b09f1f58b4873b', -- Aristotle University of Thessaloniki 'openorgs____::3fcef6e1c469c10f2a84b281372c9814', -- World Bank 'openorgs____::1698a2eb1885ef8adb5a4a969e745ad3', -- École des Ponts ParisTech - 'openorgs____::e15adb13c4dadd49de4d35c39b5da93a' -- Nanyang Technological University + 'openorgs____::e15adb13c4dadd49de4d35c39b5da93a', -- Nanyang Technological University + 'openorgs____::4b34103bde246228fcd837f5f1bf4212' -- Autonomous University of Barcelona ) )) foo; compute stats TARGET.result; From 992fc5b628ccad566acf218019cf576408564f73 Mon Sep 17 00:00:00 2001 From: dimitrispie Date: Thu, 3 Nov 2022 11:02:18 +0200 Subject: [PATCH 19/55] Added McMaster University Institution --- .../graph/stats/oozie_app/scripts/step20-createMonitorDB.sql | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql index 5dbedfa67..2bdcbfa3d 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql @@ -49,7 +49,8 @@ create table TARGET.result stored as parquet as 'openorgs____::3fcef6e1c469c10f2a84b281372c9814', -- World Bank 'openorgs____::1698a2eb1885ef8adb5a4a969e745ad3', -- École des Ponts ParisTech 'openorgs____::e15adb13c4dadd49de4d35c39b5da93a', -- Nanyang Technological University - 'openorgs____::4b34103bde246228fcd837f5f1bf4212' -- Autonomous University of Barcelona + 'openorgs____::4b34103bde246228fcd837f5f1bf4212', -- Autonomous University of Barcelona + 'openorgs____::72ec75fcfc4e0df1a76dc4c49007fceb' -- McMaster University ) )) foo; compute stats TARGET.result; From 5af5a8ae42c5c3303bad4f0eeaccee518ef0480f Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 9 Nov 2022 14:20:59 +0100 Subject: [PATCH 20/55] added IdentifierComparator --- .../dhp/oa/dedup/DedupRecordFactory.java | 21 ++++- .../eu/dnetlib/dhp/oa/dedup/IdGenerator.java | 4 + .../dhp/oa/dedup/IdentifierComparator.java | 81 +++++++++++++++++++ .../dhp/oa/dedup/model/Identifier.java | 53 +----------- .../dhp/oa/dedup/SparkOpenorgsDedupTest.java | 4 +- .../dnetlib/dhp/oa/dedup/SparkStatsTest.java | 4 +- .../src/test/resources/log4j.properties | 25 ++++++ 7 files changed, 134 insertions(+), 58 deletions(-) create mode 100644 dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/IdentifierComparator.java create mode 100644 dhp-workflows/dhp-dedup-openaire/src/test/resources/log4j.properties diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java index d345cf98f..c3b6751ba 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java @@ -2,8 +2,10 @@ package eu.dnetlib.dhp.oa.dedup; import java.util.Collection; +import java.util.Comparator; import java.util.Iterator; import java.util.List; +import java.util.stream.Collectors; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.MapGroupsFunction; @@ -13,11 +15,14 @@ import org.apache.spark.sql.SparkSession; import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.collect.Iterators; import com.google.common.collect.Lists; +import eu.dnetlib.dhp.oa.dedup.model.Identifier; import eu.dnetlib.dhp.oa.merge.AuthorMerger; import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; import scala.Tuple2; public class DedupRecordFactory { @@ -82,11 +87,19 @@ public class DedupRecordFactory { final Collection dates = Lists.newArrayList(); final List> authors = Lists.newArrayList(); - entities - .forEachRemaining( - t -> { - T duplicate = t._2(); + final Comparator> idComparator = new IdentifierComparator().reversed(); + final List entityList = Lists + .newArrayList(entities) + .stream() + .map(t -> Identifier.newInstance(t._2())) + .sorted(idComparator) + .map(Identifier::getEntity) + .collect(Collectors.toList()); + + entityList + .forEach( + duplicate -> { entity.mergeFrom(duplicate); if (ModelSupport.isSubClass(duplicate, Result.class)) { Result r1 = (Result) duplicate; diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/IdGenerator.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/IdGenerator.java index 81cd30f88..7e0d66062 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/IdGenerator.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/IdGenerator.java @@ -18,6 +18,10 @@ public class IdGenerator implements Serializable { if (pids == null || pids.isEmpty()) return defaultID; + return generateId(pids); + } + + private static String generateId(List> pids) { Identifier bp = pids .stream() .min(Identifier::compareTo) diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/IdentifierComparator.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/IdentifierComparator.java new file mode 100644 index 000000000..ba4e31128 --- /dev/null +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/IdentifierComparator.java @@ -0,0 +1,81 @@ + +package eu.dnetlib.dhp.oa.dedup; + +import java.util.Comparator; +import java.util.List; +import java.util.Optional; +import java.util.Set; +import java.util.stream.Collectors; + +import com.google.common.collect.Sets; + +import eu.dnetlib.dhp.oa.dedup.model.Identifier; +import eu.dnetlib.dhp.schema.common.EntityType; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.DataInfo; +import eu.dnetlib.dhp.schema.oaf.KeyValue; +import eu.dnetlib.dhp.schema.oaf.OafEntity; +import eu.dnetlib.dhp.schema.oaf.StructuredProperty; +import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; +import eu.dnetlib.dhp.schema.oaf.utils.PidComparator; +import eu.dnetlib.dhp.schema.oaf.utils.PidType; + +public class IdentifierComparator implements Comparator> { + + public static int compareIdentifiers(Identifier left, Identifier right) { + return new IdentifierComparator<>().compare(left, right); + } + + @Override + public int compare(Identifier left, Identifier i) { + // priority in comparisons: 1) pidtype, 2) collectedfrom (depending on the entity type) , 3) date 4) + // alphabetical order of the originalID + + Set lKeys = Optional + .ofNullable(left.getCollectedFrom()) + .map(c -> c.stream().map(KeyValue::getKey).collect(Collectors.toSet())) + .orElse(Sets.newHashSet()); + + final Optional> cf = Optional.ofNullable(i.getCollectedFrom()); + Set rKeys = cf + .map(c -> c.stream().map(KeyValue::getKey).collect(Collectors.toSet())) + .orElse(Sets.newHashSet()); + + if (left.getPidType().compareTo(i.getPidType()) == 0) { // same type + if (left.getEntityType() == EntityType.publication) { + if (isFromDatasourceID(lKeys, ModelConstants.CROSSREF_ID) + && !isFromDatasourceID(rKeys, ModelConstants.CROSSREF_ID)) + return -1; + if (isFromDatasourceID(rKeys, ModelConstants.CROSSREF_ID) + && !isFromDatasourceID(lKeys, ModelConstants.CROSSREF_ID)) + return 1; + } + if (left.getEntityType() == EntityType.dataset) { + if (isFromDatasourceID(lKeys, ModelConstants.DATACITE_ID) + && !isFromDatasourceID(rKeys, ModelConstants.DATACITE_ID)) + return -1; + if (isFromDatasourceID(rKeys, ModelConstants.DATACITE_ID) + && !isFromDatasourceID(lKeys, ModelConstants.DATACITE_ID)) + return 1; + } + + if (left.getDate().compareTo(i.getDate()) == 0) {// same date + // we need to take the alphabetically lower id + return left.getOriginalID().compareTo(i.getOriginalID()); + } else + // we need to take the elder date + return left.getDate().compareTo(i.getDate()); + } else { + return new PidComparator<>(left.getEntity()).compare(toSP(left.getPidType()), toSP(i.getPidType())); + } + } + + public boolean isFromDatasourceID(Set collectedFrom, String dsId) { + return collectedFrom.contains(dsId); + } + + private StructuredProperty toSP(PidType pidType) { + return OafMapperUtils.structuredProperty("", pidType.toString(), pidType.toString(), "", "", new DataInfo()); + } + +} diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/model/Identifier.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/model/Identifier.java index a25a853ef..0cba4fc3b 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/model/Identifier.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/model/Identifier.java @@ -11,6 +11,7 @@ import org.apache.commons.lang3.StringUtils; import com.google.common.collect.Sets; import eu.dnetlib.dhp.oa.dedup.DatePicker; +import eu.dnetlib.dhp.oa.dedup.IdentifierComparator; import eu.dnetlib.dhp.schema.common.EntityType; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelSupport; @@ -83,60 +84,12 @@ public class Identifier implements Serializable, Comparable return entity.getId(); } - private PidType getPidType() { + public PidType getPidType() { return PidType.tryValueOf(StringUtils.substringBefore(StringUtils.substringAfter(entity.getId(), "|"), "_")); } @Override public int compareTo(Identifier i) { - // priority in comparisons: 1) pidtype, 2) collectedfrom (depending on the entity type) , 3) date 4) - // alphabetical order of the originalID - - Set lKeys = Optional - .ofNullable(getCollectedFrom()) - .map(c -> c.stream().map(KeyValue::getKey).collect(Collectors.toSet())) - .orElse(Sets.newHashSet()); - - final Optional> cf = Optional.ofNullable(i.getCollectedFrom()); - Set rKeys = cf - .map(c -> c.stream().map(KeyValue::getKey).collect(Collectors.toSet())) - .orElse(Sets.newHashSet()); - - if (this.getPidType().compareTo(i.getPidType()) == 0) { // same type - if (getEntityType() == EntityType.publication) { - if (isFromDatasourceID(lKeys, ModelConstants.CROSSREF_ID) - && !isFromDatasourceID(rKeys, ModelConstants.CROSSREF_ID)) - return -1; - if (isFromDatasourceID(rKeys, ModelConstants.CROSSREF_ID) - && !isFromDatasourceID(lKeys, ModelConstants.CROSSREF_ID)) - return 1; - } - if (getEntityType() == EntityType.dataset) { - if (isFromDatasourceID(lKeys, ModelConstants.DATACITE_ID) - && !isFromDatasourceID(rKeys, ModelConstants.DATACITE_ID)) - return -1; - if (isFromDatasourceID(rKeys, ModelConstants.DATACITE_ID) - && !isFromDatasourceID(lKeys, ModelConstants.DATACITE_ID)) - return 1; - } - - if (this.getDate().compareTo(i.getDate()) == 0) {// same date - // we need to take the alphabetically lower id - return this.getOriginalID().compareTo(i.getOriginalID()); - } else - // we need to take the elder date - return this.getDate().compareTo(i.getDate()); - } else { - return new PidComparator<>(getEntity()).compare(toSP(getPidType()), toSP(i.getPidType())); - } - - } - - private StructuredProperty toSP(PidType pidType) { - return OafMapperUtils.structuredProperty("", pidType.toString(), pidType.toString(), "", "", new DataInfo()); - } - - public boolean isFromDatasourceID(Set collectedFrom, String dsId) { - return collectedFrom.contains(dsId); + return IdentifierComparator.compareIdentifiers(this, i); } } diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsDedupTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsDedupTest.java index 9312d83b1..88c28ab2f 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsDedupTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsDedupTest.java @@ -143,7 +143,7 @@ public class SparkOpenorgsDedupTest implements Serializable { .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "organization")) .count(); - assertEquals(288, orgs_simrel); + assertEquals(290, orgs_simrel); } @Test @@ -172,7 +172,7 @@ public class SparkOpenorgsDedupTest implements Serializable { .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "organization")) .count(); - assertEquals(324, orgs_simrel); + assertEquals(326, orgs_simrel); } @Test diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkStatsTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkStatsTest.java index 1ba2c717c..b33b627e7 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkStatsTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkStatsTest.java @@ -168,11 +168,11 @@ public class SparkStatsTest implements Serializable { .textFile(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_blockstats") .count(); - assertEquals(477, orgs_blocks); + assertEquals(480, orgs_blocks); assertEquals(295, pubs_blocks); assertEquals(122, sw_blocks); assertEquals(191, ds_blocks); - assertEquals(171, orp_blocks); + assertEquals(178, orp_blocks); } @AfterAll diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/resources/log4j.properties b/dhp-workflows/dhp-dedup-openaire/src/test/resources/log4j.properties new file mode 100644 index 000000000..ce37270c6 --- /dev/null +++ b/dhp-workflows/dhp-dedup-openaire/src/test/resources/log4j.properties @@ -0,0 +1,25 @@ +# Root logger option +log4j.rootLogger=DEBUG, stdout + +# Direct log messages to stdout +log4j.appender.stdout=org.apache.log4j.ConsoleAppender +log4j.appender.stdout.Target=System.out +log4j.appender.stdout.layout=org.apache.log4j.PatternLayout +log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n + +# Change this to set Spark log level +log4j.logger.org.apache.spark=ERROR +log4j.rootCategory=WARN + +# Silence akka remoting +log4j.logger.Remoting=WARN + +# Ignore messages below warning level from Jetty, because it's a bit verbose +log4j.logger.org.eclipse.jetty=WARN + +log4j.logger.org.apache.hadoop.mapreduce.lib.output.FileOutputCommitterFactory=WARN +log4j.logger.org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter=WARN +log4j.logger.org.apache.parquet.hadoop.ParquetOutputFormat=WARN +log4j.logger.org.apache.parquet.hadoop.InternalParquetRecordWriter=WARN +log4j.logger.org.apache.hadoop.io.compress.CodecPool=WARN +log4j.logger.org.apache.parquet.hadoop.codec.CodecConfig=WARN \ No newline at end of file From ddff0e8999b392bdec7b9d6263e5c53e16a6bf59 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 11 Nov 2022 16:10:25 +0100 Subject: [PATCH 21/55] merging duplicates using IdentifierComparator --- .../dhp/oa/dedup/DedupRecordFactory.java | 35 +- .../dhp/oa/dedup/EntityMergerTest.java | 15 +- .../oa/dedup/SparkDedupPublicationTest.java | 389 ++++++++++++++++++ .../dnetlib/dhp/oa/dedup/SparkDedupTest.java | 210 ++++++---- .../dedup/entities/publication/publication.gz | Bin 578793 -> 581926 bytes .../entities2/publication/publication.gz | Bin 0 -> 9056 bytes .../mock_orchestrator_publication.xml | 24 ++ .../src/test/resources/log4j.properties | 28 +- 8 files changed, 590 insertions(+), 111 deletions(-) create mode 100644 dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupPublicationTest.java create mode 100644 dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/entities2/publication/publication.gz create mode 100644 dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/profiles/mock_orchestrator_publication.xml diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java index c3b6751ba..f9fc8a21a 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java @@ -1,12 +1,12 @@ package eu.dnetlib.dhp.oa.dedup; -import java.util.Collection; -import java.util.Comparator; -import java.util.Iterator; -import java.util.List; +import java.lang.reflect.InvocationTargetException; +import java.util.*; import java.util.stream.Collectors; +import org.apache.commons.beanutils.BeanUtils; +import org.apache.commons.lang3.StringUtils; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.MapGroupsFunction; import org.apache.spark.sql.Dataset; @@ -15,14 +15,12 @@ import org.apache.spark.sql.SparkSession; import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.collect.Iterators; import com.google.common.collect.Lists; import eu.dnetlib.dhp.oa.dedup.model.Identifier; import eu.dnetlib.dhp.oa.merge.AuthorMerger; import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.*; -import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; import scala.Tuple2; public class DedupRecordFactory { @@ -79,23 +77,25 @@ public class DedupRecordFactory { public static T entityMerger( String id, Iterator> entities, long ts, DataInfo dataInfo, Class clazz) - throws IllegalAccessException, InstantiationException { + throws IllegalAccessException, InstantiationException, InvocationTargetException { - T entity = clazz.newInstance(); - entity.setDataInfo(dataInfo); + final Comparator> idComparator = new IdentifierComparator<>(); - final Collection dates = Lists.newArrayList(); - final List> authors = Lists.newArrayList(); - - final Comparator> idComparator = new IdentifierComparator().reversed(); - - final List entityList = Lists + final LinkedList entityList = Lists .newArrayList(entities) .stream() .map(t -> Identifier.newInstance(t._2())) .sorted(idComparator) .map(Identifier::getEntity) - .collect(Collectors.toList()); + .collect(Collectors.toCollection(LinkedList::new)); + + final T entity = clazz.newInstance(); + final T first = entityList.removeFirst(); + + BeanUtils.copyProperties(entity, first); + + final Collection dates = Lists.newArrayList(); + final List> authors = Lists.newArrayList(); entityList .forEach( @@ -103,12 +103,11 @@ public class DedupRecordFactory { entity.mergeFrom(duplicate); if (ModelSupport.isSubClass(duplicate, Result.class)) { Result r1 = (Result) duplicate; - if (r1.getAuthor() != null && !r1.getAuthor().isEmpty()) + if (r1.getAuthor() != null && StringUtils.isNotBlank(r1.getDateofacceptance().getValue())) authors.add(r1.getAuthor()); if (r1.getDateofacceptance() != null) dates.add(r1.getDateofacceptance().getValue()); } - }); // set authors and date diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java index aa3722ce5..c9cfb8cb2 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java @@ -7,6 +7,7 @@ import java.io.BufferedReader; import java.io.FileReader; import java.io.IOException; import java.io.Serializable; +import java.lang.reflect.InvocationTargetException; import java.nio.file.Paths; import java.util.*; import java.util.stream.Collectors; @@ -54,7 +55,7 @@ class EntityMergerTest implements Serializable { } @Test - void softwareMergerTest() throws InstantiationException, IllegalAccessException { + void softwareMergerTest() throws InstantiationException, IllegalAccessException, InvocationTargetException { List> softwares = readSample( testEntityBasePath + "/software_merge.json", Software.class); @@ -69,7 +70,7 @@ class EntityMergerTest implements Serializable { } @Test - void publicationMergerTest() throws InstantiationException, IllegalAccessException { + void publicationMergerTest() throws InstantiationException, IllegalAccessException, InvocationTargetException { Publication pub_merged = DedupRecordFactory .entityMerger(dedupId, publications.iterator(), 0, dataInfo, Publication.class); @@ -134,7 +135,7 @@ class EntityMergerTest implements Serializable { } @Test - void publicationMergerTest2() throws InstantiationException, IllegalAccessException { + void publicationMergerTest2() throws InstantiationException, IllegalAccessException, InvocationTargetException { Publication pub_merged = DedupRecordFactory .entityMerger(dedupId, publications2.iterator(), 0, dataInfo, Publication.class); @@ -146,7 +147,7 @@ class EntityMergerTest implements Serializable { } @Test - void publicationMergerTest3() throws InstantiationException, IllegalAccessException { + void publicationMergerTest3() throws InstantiationException, IllegalAccessException, InvocationTargetException { Publication pub_merged = DedupRecordFactory .entityMerger(dedupId, publications3.iterator(), 0, dataInfo, Publication.class); @@ -156,7 +157,8 @@ class EntityMergerTest implements Serializable { } @Test - void publicationMergerTest4() throws InstantiationException, IllegalStateException, IllegalAccessException { + void publicationMergerTest4() + throws InstantiationException, IllegalStateException, IllegalAccessException, InvocationTargetException { Publication pub_merged = DedupRecordFactory .entityMerger(dedupId, publications4.iterator(), 0, dataInfo, Publication.class); @@ -166,7 +168,8 @@ class EntityMergerTest implements Serializable { } @Test - void publicationMergerTest5() throws InstantiationException, IllegalStateException, IllegalAccessException { + void publicationMergerTest5() + throws InstantiationException, IllegalStateException, IllegalAccessException, InvocationTargetException { System.out .println( diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupPublicationTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupPublicationTest.java new file mode 100644 index 000000000..c657d1865 --- /dev/null +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupPublicationTest.java @@ -0,0 +1,389 @@ + +package eu.dnetlib.dhp.oa.dedup; + +import static java.nio.file.Files.createTempDirectory; + +import static org.apache.spark.sql.functions.count; +import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.Mockito.lenient; + +import java.io.File; +import java.io.IOException; +import java.io.Serializable; +import java.net.URISyntaxException; +import java.nio.file.Paths; +import java.util.*; +import java.util.stream.Collectors; + +import com.fasterxml.jackson.core.JsonProcessingException; +import org.apache.commons.io.FileUtils; +import org.apache.commons.io.IOUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.FilterFunction; +import org.apache.spark.api.java.function.FlatMapFunction; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SparkSession; +import org.junit.jupiter.api.*; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; +import org.mockito.Mockito; +import org.mockito.junit.jupiter.MockitoExtension; + +import com.fasterxml.jackson.databind.DeserializationFeature; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.collect.Sets; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; + +@ExtendWith(MockitoExtension.class) +@TestMethodOrder(MethodOrderer.OrderAnnotation.class) +public class SparkDedupPublicationTest implements Serializable { + + @Mock(serializable = true) + ISLookUpService isLookUpService; + + private static SparkSession spark; + private static JavaSparkContext jsc; + + private static String testGraphBasePath; + private static String testOutputBasePath; + private static String testDedupGraphBasePath; + private static final String testActionSetId = "test-orchestrator"; + + @BeforeAll + public static void cleanUp() throws IOException, URISyntaxException { + + testGraphBasePath = Paths + .get(SparkDedupPublicationTest.class.getResource("/eu/dnetlib/dhp/dedup/entities2").toURI()) + .toFile() + .getAbsolutePath(); + testOutputBasePath = createTempDirectory(SparkDedupPublicationTest.class.getSimpleName() + "-") + .toAbsolutePath() + .toString(); + + testDedupGraphBasePath = createTempDirectory(SparkDedupPublicationTest.class.getSimpleName() + "-") + .toAbsolutePath() + .toString(); + + FileUtils.deleteDirectory(new File(testOutputBasePath)); + FileUtils.deleteDirectory(new File(testDedupGraphBasePath)); + + final SparkConf conf = new SparkConf(); + conf.set("spark.sql.shuffle.partitions", "10"); + spark = SparkSession + .builder() + .appName(SparkDedupPublicationTest.class.getSimpleName()) + .master("local[*]") + .config(conf) + .getOrCreate(); + + jsc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + } + + @BeforeEach + public void setUp() throws IOException, ISLookUpException { + + lenient() + .when(isLookUpService.getResourceProfileByQuery(Mockito.contains(testActionSetId))) + .thenReturn(classPathResourceAsString("/eu/dnetlib/dhp/dedup/profiles/mock_orchestrator_publication.xml")); + + lenient() + .when(isLookUpService.getResourceProfileByQuery(Mockito.contains("publication"))) + .thenReturn(classPathResourceAsString("/eu/dnetlib/dhp/dedup/conf/pub.curr.conf.json")); + } + + @Test + @Order(1) + void createSimRelsTest() throws Exception { + + ArgumentApplicationParser parser = new ArgumentApplicationParser( + classPathResourceAsString("/eu/dnetlib/dhp/oa/dedup/createSimRels_parameters.json")); + + parser + .parseArgument( + new String[] { + "--graphBasePath", testGraphBasePath, + "--actionSetId", testActionSetId, + "--isLookUpUrl", "lookupurl", + "--workingPath", testOutputBasePath, + "--numPartitions", "5" + }); + + new SparkCreateSimRels(parser, spark).run(isLookUpService); + + long pubs_simrel = spark + .read() + .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "publication")) + .count(); + + assertEquals(62, pubs_simrel); + } + + @Test + @Order(2) + void cutMergeRelsTest() throws Exception { + + ArgumentApplicationParser parser = new ArgumentApplicationParser( + classPathResourceAsString("/eu/dnetlib/dhp/oa/dedup/createCC_parameters.json")); + + parser + .parseArgument( + new String[] { + "--graphBasePath", testGraphBasePath, + "--actionSetId", testActionSetId, + "--isLookUpUrl", "lookupurl", + "--workingPath", testOutputBasePath, + "--cutConnectedComponent", "3" + }); + + new SparkCreateMergeRels(parser, spark).run(isLookUpService); + + long pubs_mergerel = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/publication_mergerel") + .as(Encoders.bean(Relation.class)) + .filter((FilterFunction) r -> r.getRelClass().equalsIgnoreCase("merges")) + .groupBy("source") + .agg(count("target").alias("cnt")) + .select("source", "cnt") + .where("cnt > 3") + .count(); + + assertEquals(0, pubs_mergerel); + + FileUtils.deleteDirectory(new File(testOutputBasePath + "/" + testActionSetId + "/publication_mergerel")); + } + + @Test + @Order(3) + void createMergeRelsTest() throws Exception { + + ArgumentApplicationParser parser = new ArgumentApplicationParser( + classPathResourceAsString("/eu/dnetlib/dhp/oa/dedup/createCC_parameters.json")); + + parser + .parseArgument( + new String[] { + "--graphBasePath", testGraphBasePath, + "--actionSetId", testActionSetId, + "--isLookUpUrl", "lookupurl", + "--workingPath", testOutputBasePath + }); + + new SparkCreateMergeRels(parser, spark).run(isLookUpService); + + final Dataset pubs = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/publication_mergerel") + .as(Encoders.bean(Relation.class)); + + final List merges = pubs + .filter("source == '50|doi_dedup___::d5021b53204e4fdeab6ff5d5bc468032'") + .collectAsList(); + assertEquals(3, merges.size()); + Set dups = Sets + .newHashSet( + "50|doi_________::3b1d0d8e8f930826665df9d6b82fbb73", + "50|doi_________::d5021b53204e4fdeab6ff5d5bc468032", + "50|arXiv_______::c93aeb433eb90ed7a86e29be00791b7c"); + merges.forEach(r -> { + assertEquals(ModelConstants.RESULT_RESULT, r.getRelType()); + assertEquals(ModelConstants.DEDUP, r.getSubRelType()); + assertEquals(ModelConstants.MERGES, r.getRelClass()); + assertTrue(dups.contains(r.getTarget())); + }); + + final List mergedIn = pubs + .filter("target == '50|doi_dedup___::d5021b53204e4fdeab6ff5d5bc468032'") + .collectAsList(); + assertEquals(3, mergedIn.size()); + mergedIn.forEach(r -> { + assertEquals(ModelConstants.RESULT_RESULT, r.getRelType()); + assertEquals(ModelConstants.DEDUP, r.getSubRelType()); + assertEquals(ModelConstants.IS_MERGED_IN, r.getRelClass()); + assertTrue(dups.contains(r.getSource())); + }); + + assertEquals(24, pubs.count()); + } + + @Test + @Order(4) + void createDedupRecordTest() throws Exception { + + ArgumentApplicationParser parser = new ArgumentApplicationParser( + classPathResourceAsString("/eu/dnetlib/dhp/oa/dedup/createDedupRecord_parameters.json")); + parser + .parseArgument( + new String[] { + "--graphBasePath", testGraphBasePath, + "--actionSetId", testActionSetId, + "--isLookUpUrl", "lookupurl", + "--workingPath", testOutputBasePath + }); + + new SparkCreateDedupRecord(parser, spark).run(isLookUpService); + + final ObjectMapper mapper = new ObjectMapper() + .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); + + final Dataset roots = spark + .read() + .textFile(testOutputBasePath + "/" + testActionSetId + "/publication_deduprecord") + .map( + (MapFunction) value -> mapper.readValue(value, Publication.class), + Encoders.bean(Publication.class)); + + assertEquals(2, roots.count()); + + final Dataset pubs = spark + .read() + .textFile(DedupUtility.createEntityPath(testGraphBasePath, "publication")) + .map( + (MapFunction) value -> mapper.readValue(value, Publication.class), + Encoders.bean(Publication.class)); + + verifyRoot_case_1(roots, pubs); + verifyRoot_case_2(roots, pubs); + } + + private static void verifyRoot_case_1(Dataset roots, Dataset pubs) { + Publication root = roots + .filter("id = '50|doi_dedup___::d5021b53204e4fdeab6ff5d5bc468032'") + .first(); + assertNotNull(root); + + Publication crossref_duplicate = pubs + .filter("id = '50|doi_________::d5021b53204e4fdeab6ff5d5bc468032'") + .collectAsList() + .get(0); + + assertEquals(crossref_duplicate.getJournal().getName(), root.getJournal().getName()); + assertEquals(crossref_duplicate.getJournal().getIssnPrinted(), root.getJournal().getIssnPrinted()); + assertEquals(crossref_duplicate.getPublisher().getValue(), root.getPublisher().getValue()); + + Set rootPids = root + .getPid() + .stream() + .map(StructuredProperty::getValue) + .collect(Collectors.toCollection(HashSet::new)); + Set dupPids = crossref_duplicate + .getPid() + .stream() + .map(StructuredProperty::getValue) + .collect(Collectors.toCollection(HashSet::new)); + + assertFalse(Sets.intersection(rootPids, dupPids).isEmpty()); + assertTrue(rootPids.contains("10.1109/jstqe.2022.3205716")); + + Optional instance_cr = root + .getInstance() + .stream() + .filter(i -> i.getCollectedfrom().getValue().equals("Crossref")) + .findFirst(); + assertTrue(instance_cr.isPresent()); + assertEquals("OPEN", instance_cr.get().getAccessright().getClassid()); + assertEquals("Open Access", instance_cr.get().getAccessright().getClassname()); + assertEquals(OpenAccessRoute.hybrid, instance_cr.get().getAccessright().getOpenAccessRoute()); + assertEquals( + "IEEE Journal of Selected Topics in Quantum Electronics", instance_cr.get().getHostedby().getValue()); + assertEquals("0001", instance_cr.get().getInstancetype().getClassid()); + assertEquals("Article", instance_cr.get().getInstancetype().getClassname()); + } + + private void verifyRoot_case_2(Dataset roots, Dataset pubs) throws JsonProcessingException { + Publication root = roots + .filter("id = '50|doi_dedup___::18aff3b55fb6876466a5d4bd82434885'") + .first(); + assertNotNull(root); + + Publication crossref_duplicate = pubs + .filter("id = '50|doi_________::18aff3b55fb6876466a5d4bd82434885'") + .first(); + + //System.err.println(new ObjectMapper().writeValueAsString(root)); + + assertEquals(crossref_duplicate.getJournal().getName(), root.getJournal().getName()); + assertEquals(crossref_duplicate.getJournal().getIssnOnline(), root.getJournal().getIssnOnline()); + assertEquals(crossref_duplicate.getJournal().getVol(), root.getJournal().getVol()); + + assertEquals(crossref_duplicate.getPublisher().getValue(), root.getPublisher().getValue()); + + Set dups_cf = pubs + .collectAsList() + .stream() + .flatMap(p -> p.getCollectedfrom().stream()) + .map(KeyValue::getValue) + .collect(Collectors.toCollection(HashSet::new)); + + Set root_cf = root + .getCollectedfrom() + .stream() + .map(KeyValue::getValue) + .collect(Collectors.toCollection(HashSet::new)); + + assertTrue(Sets.difference(root_cf, dups_cf).isEmpty()); + } + + @Test + @Order(6) + void updateEntityTest() throws Exception { + + ArgumentApplicationParser parser = new ArgumentApplicationParser( + classPathResourceAsString("/eu/dnetlib/dhp/oa/dedup/updateEntity_parameters.json")); + parser + .parseArgument( + new String[] { + "-i", testGraphBasePath, "-w", testOutputBasePath, "-o", testDedupGraphBasePath + }); + + new SparkUpdateEntity(parser, spark).run(isLookUpService); + + long publications = jsc.textFile(testDedupGraphBasePath + "/publication").count(); + + long mergedPubs = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/publication_mergerel") + .as(Encoders.bean(Relation.class)) + .where("relClass=='merges'") + .javaRDD() + .map(Relation::getTarget) + .distinct() + .count(); + + assertEquals(14, publications); + + long deletedPubs = jsc + .textFile(testDedupGraphBasePath + "/publication") + .filter(this::isDeletedByInference) + .count(); + + assertEquals(mergedPubs, deletedPubs); + } + + @AfterAll + public static void finalCleanUp() throws IOException { + FileUtils.deleteDirectory(new File(testOutputBasePath)); + FileUtils.deleteDirectory(new File(testDedupGraphBasePath)); + } + + public boolean isDeletedByInference(String s) { + return s.contains("\"deletedbyinference\":true"); + } + + private static String classPathResourceAsString(String path) throws IOException { + return IOUtils + .toString( + SparkDedupPublicationTest.class + .getResourceAsStream(path)); + } + +} diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java index 9c9ec43d5..3de14f577 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java @@ -4,8 +4,7 @@ package eu.dnetlib.dhp.oa.dedup; import static java.nio.file.Files.createTempDirectory; import static org.apache.spark.sql.functions.count; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.*; import static org.mockito.Mockito.lenient; import java.io.File; @@ -14,7 +13,11 @@ import java.io.IOException; import java.io.Serializable; import java.net.URISyntaxException; import java.nio.file.Paths; +import java.util.HashSet; import java.util.List; +import java.util.Optional; +import java.util.Set; +import java.util.stream.Collectors; import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; @@ -35,10 +38,13 @@ import org.mockito.Mock; import org.mockito.Mockito; import org.mockito.junit.jupiter.MockitoExtension; +import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.collect.Sets; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.schema.oaf.Relation; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; import eu.dnetlib.pace.util.MapDocumentUtil; @@ -105,57 +111,27 @@ public class SparkDedupTest implements Serializable { lenient() .when(isLookUpService.getResourceProfileByQuery(Mockito.contains(testActionSetId))) - .thenReturn( - IOUtils - .toString( - SparkDedupTest.class - .getResourceAsStream( - "/eu/dnetlib/dhp/dedup/profiles/mock_orchestrator.xml"))); + .thenReturn(classPathResourceAsString("/eu/dnetlib/dhp/dedup/profiles/mock_orchestrator.xml")); lenient() .when(isLookUpService.getResourceProfileByQuery(Mockito.contains("organization"))) - .thenReturn( - IOUtils - .toString( - SparkDedupTest.class - .getResourceAsStream( - "/eu/dnetlib/dhp/dedup/conf/org.curr.conf.json"))); + .thenReturn(classPathResourceAsString("/eu/dnetlib/dhp/dedup/conf/org.curr.conf.json")); lenient() .when(isLookUpService.getResourceProfileByQuery(Mockito.contains("publication"))) - .thenReturn( - IOUtils - .toString( - SparkDedupTest.class - .getResourceAsStream( - "/eu/dnetlib/dhp/dedup/conf/pub.curr.conf.json"))); + .thenReturn(classPathResourceAsString("/eu/dnetlib/dhp/dedup/conf/pub.curr.conf.json")); lenient() .when(isLookUpService.getResourceProfileByQuery(Mockito.contains("software"))) - .thenReturn( - IOUtils - .toString( - SparkDedupTest.class - .getResourceAsStream( - "/eu/dnetlib/dhp/dedup/conf/sw.curr.conf.json"))); + .thenReturn(classPathResourceAsString("/eu/dnetlib/dhp/dedup/conf/sw.curr.conf.json")); lenient() .when(isLookUpService.getResourceProfileByQuery(Mockito.contains("dataset"))) - .thenReturn( - IOUtils - .toString( - SparkDedupTest.class - .getResourceAsStream( - "/eu/dnetlib/dhp/dedup/conf/ds.curr.conf.json"))); + .thenReturn(classPathResourceAsString("/eu/dnetlib/dhp/dedup/conf/ds.curr.conf.json")); lenient() .when(isLookUpService.getResourceProfileByQuery(Mockito.contains("otherresearchproduct"))) - .thenReturn( - IOUtils - .toString( - SparkDedupTest.class - .getResourceAsStream( - "/eu/dnetlib/dhp/dedup/conf/orp.curr.conf.json"))); + .thenReturn(classPathResourceAsString("/eu/dnetlib/dhp/dedup/conf/orp.curr.conf.json")); } @Test @@ -163,11 +139,7 @@ public class SparkDedupTest implements Serializable { void createSimRelsTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - SparkCreateSimRels.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/dedup/createSimRels_parameters.json"))); + classPathResourceAsString("/eu/dnetlib/dhp/oa/dedup/createSimRels_parameters.json")); parser .parseArgument( @@ -207,7 +179,7 @@ public class SparkDedupTest implements Serializable { .count(); assertEquals(3076, orgs_simrel); - assertEquals(7040, pubs_simrel); + assertEquals(7046, pubs_simrel); assertEquals(336, sw_simrel); assertEquals(442, ds_simrel); assertEquals(6784, orp_simrel); @@ -223,11 +195,7 @@ public class SparkDedupTest implements Serializable { void whitelistSimRelsTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - SparkWhitelistSimRels.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/dedup/whitelistSimRels_parameters.json"))); + classPathResourceAsString("/eu/dnetlib/dhp/oa/dedup/whitelistSimRels_parameters.json")); parser .parseArgument( @@ -264,7 +232,7 @@ public class SparkDedupTest implements Serializable { // entities simrels supposed to be equal to the number of previous step (no rels in whitelist) assertEquals(3076, orgs_simrel); - assertEquals(7040, pubs_simrel); + assertEquals(7046, pubs_simrel); assertEquals(442, ds_simrel); assertEquals(6784, orp_simrel); // System.out.println("orgs_simrel = " + orgs_simrel); @@ -306,11 +274,7 @@ public class SparkDedupTest implements Serializable { void cutMergeRelsTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - SparkCreateMergeRels.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/dedup/createCC_parameters.json"))); + classPathResourceAsString("/eu/dnetlib/dhp/oa/dedup/createCC_parameters.json")); parser .parseArgument( @@ -402,11 +366,7 @@ public class SparkDedupTest implements Serializable { void createMergeRelsTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - SparkCreateMergeRels.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/dedup/createCC_parameters.json"))); + classPathResourceAsString("/eu/dnetlib/dhp/oa/dedup/createCC_parameters.json")); parser .parseArgument( @@ -427,10 +387,10 @@ public class SparkDedupTest implements Serializable { .read() .load(testOutputBasePath + "/" + testActionSetId + "/organization_mergerel") .count(); - long pubs_mergerel = spark + final Dataset pubs = spark .read() .load(testOutputBasePath + "/" + testActionSetId + "/publication_mergerel") - .count(); + .as(Encoders.bean(Relation.class)); long sw_mergerel = spark .read() .load(testOutputBasePath + "/" + testActionSetId + "/software_mergerel") @@ -445,8 +405,35 @@ public class SparkDedupTest implements Serializable { .load(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_mergerel") .count(); + final List merges = pubs + .filter("source == '50|doi_dedup___::d5021b53204e4fdeab6ff5d5bc468032'") + .collectAsList(); + assertEquals(3, merges.size()); + Set dups = Sets + .newHashSet( + "50|doi_________::3b1d0d8e8f930826665df9d6b82fbb73", + "50|doi_________::d5021b53204e4fdeab6ff5d5bc468032", + "50|arXiv_______::c93aeb433eb90ed7a86e29be00791b7c"); + merges.forEach(r -> { + assertEquals(ModelConstants.RESULT_RESULT, r.getRelType()); + assertEquals(ModelConstants.DEDUP, r.getSubRelType()); + assertEquals(ModelConstants.MERGES, r.getRelClass()); + assertTrue(dups.contains(r.getTarget())); + }); + + final List mergedIn = pubs + .filter("target == '50|doi_dedup___::d5021b53204e4fdeab6ff5d5bc468032'") + .collectAsList(); + assertEquals(3, mergedIn.size()); + mergedIn.forEach(r -> { + assertEquals(ModelConstants.RESULT_RESULT, r.getRelType()); + assertEquals(ModelConstants.DEDUP, r.getSubRelType()); + assertEquals(ModelConstants.IS_MERGED_IN, r.getRelClass()); + assertTrue(dups.contains(r.getSource())); + }); + assertEquals(1268, orgs_mergerel); - assertEquals(1444, pubs_mergerel); + assertEquals(1450, pubs.count()); assertEquals(286, sw_mergerel); assertEquals(472, ds_mergerel); assertEquals(738, orp_mergerel); @@ -463,11 +450,7 @@ public class SparkDedupTest implements Serializable { void createDedupRecordTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - SparkCreateDedupRecord.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/dedup/createDedupRecord_parameters.json"))); + classPathResourceAsString("/eu/dnetlib/dhp/oa/dedup/createDedupRecord_parameters.json")); parser .parseArgument( new String[] { @@ -483,12 +466,18 @@ public class SparkDedupTest implements Serializable { new SparkCreateDedupRecord(parser, spark).run(isLookUpService); + final ObjectMapper mapper = new ObjectMapper() + .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); + + final Dataset pubs = spark + .read() + .textFile(testOutputBasePath + "/" + testActionSetId + "/publication_deduprecord") + .map( + (MapFunction) value -> mapper.readValue(value, Publication.class), + Encoders.bean(Publication.class)); long orgs_deduprecord = jsc .textFile(testOutputBasePath + "/" + testActionSetId + "/organization_deduprecord") .count(); - long pubs_deduprecord = jsc - .textFile(testOutputBasePath + "/" + testActionSetId + "/publication_deduprecord") - .count(); long sw_deduprecord = jsc .textFile(testOutputBasePath + "/" + testActionSetId + "/software_deduprecord") .count(); @@ -499,11 +488,13 @@ public class SparkDedupTest implements Serializable { .count(); assertEquals(86, orgs_deduprecord); - assertEquals(67, pubs_deduprecord); + assertEquals(68, pubs.count()); assertEquals(49, sw_deduprecord); assertEquals(97, ds_deduprecord); assertEquals(92, orp_deduprecord); + verifyRoot_1(mapper, pubs); + // System.out.println("orgs_deduprecord = " + orgs_deduprecord); // System.out.println("pubs_deduprecord = " + pubs_deduprecord); // System.out.println("sw_deduprecord = " + sw_deduprecord); @@ -511,16 +502,63 @@ public class SparkDedupTest implements Serializable { // System.out.println("orp_deduprecord = " + orp_deduprecord); } + private static void verifyRoot_1(ObjectMapper mapper, Dataset pubs) { + Publication root = pubs + .filter("id = '50|doi_dedup___::d5021b53204e4fdeab6ff5d5bc468032'") + .first(); + assertNotNull(root); + + final Dataset publication = spark + .read() + .textFile(DedupUtility.createEntityPath(testGraphBasePath, "publication")); + + Publication crossref_duplicate = publication + .map( + (MapFunction) value -> mapper.readValue(value, Publication.class), + Encoders.bean(Publication.class)) + .filter("id = '50|doi_________::d5021b53204e4fdeab6ff5d5bc468032'") + .collectAsList() + .get(0); + + assertEquals(crossref_duplicate.getJournal().getName(), root.getJournal().getName()); + assertEquals(crossref_duplicate.getJournal().getIssnPrinted(), root.getJournal().getIssnPrinted()); + assertEquals(crossref_duplicate.getPublisher().getValue(), root.getPublisher().getValue()); + + Set rootPids = root + .getPid() + .stream() + .map(StructuredProperty::getValue) + .collect(Collectors.toCollection(HashSet::new)); + Set dupPids = crossref_duplicate + .getPid() + .stream() + .map(StructuredProperty::getValue) + .collect(Collectors.toCollection(HashSet::new)); + + assertFalse(Sets.intersection(rootPids, dupPids).isEmpty()); + assertTrue(rootPids.contains("10.1109/jstqe.2022.3205716")); + + Optional instance_cr = root + .getInstance() + .stream() + .filter(i -> i.getCollectedfrom().getValue().equals("Crossref")) + .findFirst(); + assertTrue(instance_cr.isPresent()); + assertEquals("OPEN", instance_cr.get().getAccessright().getClassid()); + assertEquals("Open Access", instance_cr.get().getAccessright().getClassname()); + assertEquals(OpenAccessRoute.hybrid, instance_cr.get().getAccessright().getOpenAccessRoute()); + assertEquals( + "IEEE Journal of Selected Topics in Quantum Electronics", instance_cr.get().getHostedby().getValue()); + assertEquals("0001", instance_cr.get().getInstancetype().getClassid()); + assertEquals("Article", instance_cr.get().getInstancetype().getClassname()); + } + @Test @Order(6) void updateEntityTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - SparkUpdateEntity.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/dedup/updateEntity_parameters.json"))); + classPathResourceAsString("/eu/dnetlib/dhp/oa/dedup/updateEntity_parameters.json")); parser .parseArgument( new String[] { @@ -587,7 +625,7 @@ public class SparkDedupTest implements Serializable { .distinct() .count(); - assertEquals(898, publications); + assertEquals(902, publications); assertEquals(839, organizations); assertEquals(100, projects); assertEquals(100, datasource); @@ -640,11 +678,7 @@ public class SparkDedupTest implements Serializable { void propagateRelationTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - SparkPropagateRelation.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/dedup/propagateRelation_parameters.json"))); + classPathResourceAsString("/eu/dnetlib/dhp/oa/dedup/propagateRelation_parameters.json")); parser .parseArgument( new String[] { @@ -714,4 +748,12 @@ public class SparkDedupTest implements Serializable { public boolean isDeletedByInference(String s) { return s.contains("\"deletedbyinference\":true"); } + + private static String classPathResourceAsString(String path) throws IOException { + return IOUtils + .toString( + SparkDedupTest.class + .getResourceAsStream(path)); + } + } diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/entities/publication/publication.gz b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/entities/publication/publication.gz index cf513d7302bda81e6bdae0dbb430f5852a91307f..a7457063bcbe59481e11c944d4dff7a3244e4831 100644 GIT binary patch delta 24067 zcmXVXWk8h8_qO!XDM)upcPt>?v2=HLO3KpRU4nFnG$>sQNSAbX2ut_N^ZUPF&o!Us z-1p46VvZ`POZ%t+RiFaTm00I3-OfDHvN`7Q3Z zxLP`1*KK~sv>u-;zzg;|o~tm7-;pCRZr!t3I89Oi^#!NF1XRq+rp)a;4h9n!Td4Ep zBwL0N-OTC5v&^F7pu@%;_;kQbGtV~%9<9#u%tA@IT!fVdQPTbB+omcCDC@gdQfibF!Z^^sm-qq!T~u&nM&<)Mz$Kjyg$;w;)CFh(N!5LOd8uCf|GS2)37m5wp+!NtIjQZPd<6sg!Eb>Ziu3LV9=oOh-vy!G==P&1t z{$gfou3t6cv895_QQ%3+l-bkXMhUuMEn%4(gB8z;-tsN`^kKO3DOouz_@GN721|UyV9yds)WC+Af=H06kBVZQ>NPfYmfg=;#;HiqO zv`@cbF|UvksPcf3G%FT>U!IQ+?2#ipfAXkuQx~)B_vzAI=ogyse+^@wk?`Q5{r1O zU>~CUEUwaMi@^SkU6mY|f%M57M}6BgCUk#`lLT>Lesk_b8q|05W~JH2F>a>Qb6jhP zUVFQ>$`SMI2kLXgl9vS(pC@(59Cv|~_4(#i%34hS6h9(}E{ico|JginfhoRB8X9gA zExyF8t8H1BKI5Hl5nC0iqd#|Ca{$e=49!aqyd2E5DT1^AJm!+1`%wAgCz&rHc^L!* z{7L>I3vX)>yPo{FH3nyewz*DsziHd!(tN8ig!YzaOpU%XXjg;|panB5O$*@7S_QLS zYvg@a03q9a7Nxw~;68ujK5oPcU+VV3li6h$^G@Z;lJVvU{|;ZxS5NcJMB>vJXV8UC z<*12HvJ?1yqa*m`_BuS$tZUj9AMwhllX`Ck1qc1z;J;^B?c%35g0B|JE*Zkp#;tdG zVXDeW|7zk><<&$v+^ASpky?3^-Ee2v6U|mQ-=OvL*(M;e=GC-&IW)_0oVm1G+#rO| zrudJ}wr*9{^$AH&=x|1p^KN~$W7j|!%(09$XaqOW^d&ZZnSqsykrr?qhx?+L)3*?x zJ?RLa&njLB8x_q&Vo3!Gk{qPO`j|Zps%bH`t)pk6i-_zBFQ4)VX(4Hr9O9#5>VjIZ zM@p`#so7jo%I zRym@{5~InI()BB)*|~OSo!FI@9i7uexokx8b9iUx?0z@-=g|H%%zCoad*O~UALtE_poTaIVL|a zK~^mlb}kK;)MEiCq^4}l%8cG-p#<17LK1f5MFk(MQo?ei5v z!(}Iqou+UVNDN)Q-W9@ye zo^0?hU80Wce={4?uT7dTBGP~i{PFjjPfu?JQ5U?VqYoILKG7}Mr|OIBfE0R$JbNGM z@h?rJC0vFYxXBMfKH>!HS!R?dg#mTNkzwQks^U}3Wi?s3Dw>Pae+bk{$=Jl_KU5;J z3y^EVIoA^1BQtouaDP*G1m|=#BD@4X*u8w2R?2}OMq@{1-DgH6WZm;vGrzY#9d<67 z_n=R{_%TQF#!IH&V+5y@;|)Z!^Mud{rtDKiE3i*qAU_RC!JO^;F{aN1F3*UE-DMU9bbaOS-(x*@EGD9CD6R-H@&L(|{wq{Zj9M_oVY@cn7_=m$V z#)qkhj0;mx%wKc~Cq)oqe->889F4Zy@eQ^ul# z##GE55=xB|FU!w6W^0cVpIxOkTQJlZy0arC%TKkX6IO&yLz3j;57&2TZEC|KC$n~R z=(I9*&5{^@dy%kM);V>;%qR)s-Cd+esK+8bdocGpSCmLyl)MbD3>PMs)2rGM zwcgevI2t9~78f1A-?rJE7@Tp8{fPC!Z(Pkg`iT1>`Pb7R*ogwWnXguy%C5ESh^+UK z4x*b^=J1L7E_8MFYg<{>Ny?a1lPkO*(e8I%vhQq*qdX*)9hNWsCCzRJaK^=XiN2NM z@fd0#H+O=fnSHgcG)Q)_65opRTXx13x$2QJyKl>-gH{3fpcnZ$1XCbbFe<`1{9b+m|nwH!HvqYEF{} zh6Kbya1=7BlGKhpLXJC#cKckZZ>`Fl97tRSi>Nv9mm#B-?9S&>n!+xi=Yce`iVtkJ zL|N;^L6jLz$Uk@PGj~a{wk6D?+LJVYhe&5b1ahK#hr2q^nE{ii{Ke~p9G4sk97tTEi2CF4175oB5c z#z==Q3Go{ibvy%v=e<=x*K_OMqhD0cqRQB1%6PN9)9KAVX>S#ya(o)dxrz=G4olUF zCa_{TR`Y9*s-q%|y3cboF$6F9W}puS@6|E4CmDN(m^DV`v<}9}^anLZxtSS!Dm_f8brH;jr>5UvH^q!6W2C z`{OA-)aaRbIzCk6rc_qK3v?36=!uRVs6|0TtMe;6o= zq@0N77ur5WcVEs98&Dh%CwO{5pPrS9Y^44_04d|Nc!_k@Nyr0rc#wF3H$J9Suf^`! z&ph+xkQMvlH!~Gn(rP2V0+`H#8F!x$_5ua=0?=bVI?Jn`XVv$o-5UqymJa4LrICkK zQX_X+tZzHN3rPMdc)l@A<@lXLheLd*a9eLY`&ljrNVen3ZK$-nL>kh>zg;Axe2%Km zI2i(@qS~xhnxGNMIfyc0j~v=b%~(vFWhZN3P%y27(Ow%e`%7mi0wx;VWgfj^#-&t; z(34>xJQzGnK`Hu3UZp8zKm#K_{uz+THCal_qpLz*BB0b=wf?guQ<)2FYgKa+@n1r| zz!5zN4tE&sVMWW9+9WsHq-p7jx{u1DUrQNrM;vqS>reRXaBeZf?9Jfbuf9FeMPDjc z#G{Ts6E`4!%o${g1wVg)!a*>($8GJQaN$Z7=IUp<0h^apyGV)T<-w9f|AD^|M5hc$ zMdk%7XItoq9v3+fZh13de_4Y1mopjfeLcQ@m9NeJ`s+w7JaCyV8JKCPI%MVm~62t9o->Ays<{{4BQLH&KE!9dpG$g-Ub zh=#vic^mi)N(MN|lPOV%BrJcE+E$@T(4;X&x+}j7XyLHzkD-fZYslK#bd3}DB85Egc0+B_v}-a}{JAR< zXQupf^1VOFpnmrsdvNi|xgB#9$t<*&k+OH=>hIg6$biVI>KwUr7uo;B!HR@%?`VBq_7 z0FL;JcPe$NLp(@kT%E;+c~}f(S_Q+Gc|EOBX>xykZ;bM|cQ(g4pEM<$pk>eM=Mz>S zQv-X9r67sa&bWr_Q|{)XkXw^+Qb;7j%`xX4F)(wRB4Pnu^tfmOmcTl428cGA>E(uxL9Yz$i&Ed_AV2{D5*pE ziD-bhvDnxcy4izEZ8{l`gNp#dU;DWU*I-<5O3GcO#c>ss3gTma@y7P>_2NG zqso?w3jaou>ap2mfqNQF!V^5?1u%@AY z>r3J1#4l^qu$t29ko<*RmA5M6HSYXYZ7hb-nCB7jjeA(n23w-R1KO{7!)bB!bTA$> zWAU%QecNAsJ|vY$g2+iMuz|AzZu(E5$1a^V?C?BwQ0qV2=ETs z10cyjQkg1mHSV*4YL+0#O{`QOtTfSpk&V`%VG*Lmf&@V{{hAKtB))d&si@wMu-g7R zssh31iDMC@lGvi>K>hd0*=MkVDK8SPSDh?5^v*;}X^1qr&%Nh6yGke(G44e+roD!| z4jxuyf{AN<^j5sZl+#S>=wmcjr`ve4qBOD*38rkWY-=no?6CpeO*i+~_}f&{d&F)PVCeB>%~q|Cy+1fSxx~q270G>!L*1a&YT{;F9J$ zP4I?t_je@(CZ-A}MMHyBqJ-Wb&)TK%X^02^Bx{8oYLB>Dj?3?eQitY#h@)^_oqhXA zW!^~2a^;-Pr+r}YFk_Tu32vTRCL9}+aH~A~*ld--r$hboWO}#Q=FBW-+q7#?pVw?U znzbfrk>_W5V$}Bs{3<0D=VEE9MaMk)7y(tboZ6($o3w~R&)x>K*_^~f@xm%wW&PIW zAC`jgH)``4M^X?+%GR)X#H1Bpd#96wex@6HOgrBdtu+=Gqx~v@I&fS?ElSMIfT<>+ zI%K~t@ps=$FzOWnnV1F(Y1>zzn6ZG(G4e}^DnhCO%D>N;~ zlJi)Cd$Y)yXqPyuD4aPc@PjWEGhfYN-cXx$Pjow4Sj5LIN-z(pI#>CLU9Fx*u@S2{ zFj+i>Y3wT+2iw*5E_T+EY>c-@fUm!}mE$;SXZ+KJo@P$Khm@0;`g1zuw(ckKrHY=Z7b{CodpwCYLN@$*z9MiqovhY|SxnOUM!K=`cGpc&E> z{_0}(i75TIn63SE7H*Bhs8*!aQvLGIo8Jw*ucy0z2P8f%e`f2H)LFNss@Y*>VeF{3iwau0DLu1o&p46B*R{D3369uG9G92dq13kc&%PE^D-{e>%v>FyDZ+Ylxbx0MWyq zXsTV1u8M(ikC17DLvuLzGYB)^Vw|t)Tu4_CS(9tCvLciH_smo$u{+z&u7#)q zVipCSQ7~5rA9i5gt{iZinQ9S1T&u#%%^-GOjo6L=&P$9`FST6ShhbF0F*QO2Rwk~} zv7xce*YW7koS_0(msm*}U#pQmf!6W%Zx2K-*)e6AG8vG(aPjcC@8bMj8S6=)DQo0o zoPy^rI0mq9sS_lhwmYUbKyQiDjr)BfM7AL}4E%j!R~0n3f3B0`M^plhqf54?Q<2od zLGqcNwt!IT#7YV(j)a(4Am=BK(hy?z)F)yR0QKbd#5TF~(E2#3RNxjF6h__@9Nfmb z3rD@InT*W~wByP9d2uq5yIF>Rp`<4)6(QS2;Am`cq8YdH^f&qbDI}YB$S3*}JgCFc z0p9*XTmDpuJ%bsqxuK$BdsrcSiC048YsUt(R10~h@&lT5zIH0Wu!G$>R#`9BA?9T7 zA8MdwhXB2axmo-O z{=k(}X_QR4yTU0Yp-@&!dnL06_DQPay8ga0v7+-vH;V{wK^uG2YIP9rI%9m1qQ*(r zV>0ndYywFjKU~03X<}GIX|{#o8~CgHt2efr2;@NnTP!*k(@g|v=pmETuCO%`@Wloa*dBa;0EmBa7`v{F!L$PSUb(dK>RVSSe>* zj?dExfboaFsJf32`yc`fzIXnx`SyHfVaUI6cIdk@Zd88D71RPNpZ%{ORPBK%T2G#8^Uh9{ChW=|7{T{2h{*1q!->)|dNW-!MePHDJ}$4MM`*+9~oLN-eO zhjqvdl-#?@-^V(1(Md$cgpSZeP1we|>wHkakZUy9XD&3)-pSNhl^WE<0z~A}CMoiT zK{DtI!G|m!n&t*!xcYG4)v5(?geCI1hv>K$3DW-QnmuF-B#E<1rkk|<@EX%l z5U?{{EG{o`g%j`2O15q1N5I@}?Vy>$b4?N^6$V@ulS7ghte-P$%x$r%kV4MazYcT9 zde{DRkwyhCyjfB`Nn9$Vl>U9vTTzQ!L8zD4C6ecB}+b?k(!VGc;m-u5g?YHdz z<6+Z2bhbY~?(9W8k5WNJlxO{O9j5|y`@yaONFe}C`EoV!G_`kU&q)~U)6c6IgD;Zv z-+Yhs(`I;N?N=HF{b-tMt%hZzW9sJugQNOg`MIM*tCiA6{9SpX<22Q}x!+E@!u>@? z$T#w%_M;=(eG_S7rE00!`9@KCrB=3C5H`MKhD@qywdp1_7}`zBJiOtqi7jUxMJv=0 zxT?`2v{J&A%mYhyB2Tc=ke?P7dkk@{lW%q%m(t$cj?wf-hL9!d(L6E=%kKQV8Sd!3 zWlWVVp=Q23xBs@B?)H~)t%_5C=$qSP&}u-t&)eSfF7zIrbdCh+TW3eR@B?4FaL2QE ze7tR@xpIO^$6?C;{q8PwcYi0p=KN;l@@zZ<)kDeMz&+qb27Ge63#*kwtz)AMc{mz8 zVi|=Tk3vL#I&ZjXYJX?I!Ol<8Tx;4>A&wdm`Sx@hLR=yj7FcjYtHKj%o=?GGxkXpz z)5(BJ-EGhUk`Gg3N>zK8+D`Iz_Od&u8{<-Hs^7XE-Q0aVlxqX%>qRN%UwUuKM1PeHubDy8 zaX#A-pugK3{fx)WilJIoW67eV)eEC8HFC~BR_y`=zO;WFtx3(;)5;n6!BAnBTzuSA ze$S0iSX_D>8-Y2s|1Oi4k-BAJ`oMOhkGSUd5)pqa0^?h1pfg?;3!$!kO&jqwpu86{ z)y;EJd)`ZzU_;Q<_T#=hRrM?ZCFoTSt$pzlu6@cT^bV>zs&poh_^7GfDGSf#UR&xr) z1ojtVaL=`<{ejM%17cZ`t>fDA&=T5hm6)Gh=GQS7bVu{4S7z)itdO8`lPi@Te}uF$ zq_`1kif*%QnJ)p@iT_v@R?k`3+P|>}`qxl+ur5n}>5>b4&Zi_`JbittNBVHk5II1N zEfu!!l53f34e0*I6hk=eDY=*vT6+>+s0y;M`bnFuv0un7;WYv8ZH6 z-O;7)@e*vAMF9pR(0c)DsGGV-ZC;KacilKtKgppWncgVL9E=@%>GK(cbWHXhs}N9D z$JhIQG<3z*lF@m&F!Ag^wy|3AX#Ek^dUVo!(P7(MUi;=}uJgjH0*&<0#T&y|`v3EQ7_=B7S3WMOfvk{VUF02p(x4gL~BWB#ER_SySILf0ER@10aV%0~TQS z`XCn%_xVynsB)>mS6_RLWr|3UEH1oB>Z$`l9qu-;#m3F9hpv}KjX0F{eC@!x69E};2G1nivy(JN#uJBV|Ajb zLyaJDi*0@FQ==ect(#)_>XFcavE(T7s0^uX%UPKvYctCb8=R;#I_%HnJ_E>a9P$<# zI5O~gRrKO{>7mhWaK;J~U`q(4QIsS~4ALqls?4|2PUoK=9;wn(Omdsepq&_@J;%_6PdMd{Bxb2}hnUxkdmb8`XRBLFrxw$!K~ z-+4?O17Z1R!o%okhP)Is|L_`c0q&YC5Bq;i76WHc7SBAacc0(EFj=P+Ek*6bN5|P0 zJ9$7BN4p&sRn81d6SWaY^ZGzxr3B6FAYF5?k8eSM-rvaF<9!d8^MRoMv+9y@YU7ae ziL0S)6AO&`?V94fGnr7|zzov{J0a;tK=Kg#1V^@8gumJa(4}f{t)ijSr;Lc1iOs+X z_%kJu?uE(AI@$P}j?b6qOtmsI0Hq`e9GyFi@M6*g{BI!Sm>I zJUUYW5g8aiIm=YnC)>h@7poswL-Cpx%dVIAz#*(~_`?)jc@p_tr-O*~;EV>{IIaG9 zW3D3kmJ|94jvj>bgMV2PM%_|6ASAqa^`_`ahkNwYx^-fm$uzxL z7rm@P6=P0&2nb=aN^B(UN&Rp zaB+t5gyHcfl8O1kA0=4{I12kpE_y1>vm66krM|sOz1YPvm2H_J*MJ%7h%X6;Rx(A7 zD@(9KX1}vZr#&Q`v$7TVtYkzohAbZ;Bbd5{vPO&wWS+W^s#6HAfY}TaA5^*P8A&NI z6HEl9{=5LQD}(=l-355N+$-Pcy`XM3n`z&~eMqvpq~4IjPr<{-+Kwr%1|u28Zf|9x z72f-azP^&nMp94)heAXx7ok8(vm=obql90=`yF}2?2`xSXZphZtvoc6x9C=j6+uXN z^FvEj#MN_;J79CqGp;9qN7;LQ|F!k`(N-KF6Ug(FaPDd^PA zc`$F@W^4AIVLoLNk4rb=)|DfFMdPeRC`k>xL{rXNm(FK z6y++c1Wkap?DJ&ZW;xJI0_kR7>__v(hm=Wtq+9^aZMz%ix5t-1#JpJ_dnUP|;^I_Tkz3qpj9xS+^QXeU&fGc3q zKV|0d3;ZB`GM!surnuvLd;F*f|I*W=IINJ~ecevn7?iBPV!~L*LGSv<+rE~D1!gIhp+jzd!S8#a%hm!DN1;!a@xI|dJwcXxrgOGSnI?$LK()YtJit( z_9kieXz<}!or7pYbSwzXb$s1XcM8ZZYK2fc-O_rw6~!;C`Wk>+#rI<@GO{RrXMWr@ zr&vy2igpjR?@@+IH*I%jBAq`x4*FbZp(}ToPQxi9w*%_gzAkxoE)8;-n9xED6u}aZ z@|HvvO9WWiHeMZ(s-3L`{V*%s2k~_T8W|hWZ&sC&DkS2e-oKjKr^Yq9qxwTVdj~Hs zTCFP5F0I2ZZPEctW^YMyHi31(M0H@Ectn#9@uOO{SSQM{rfS{H`@U3Zb7&g2mcIbg z*xy^qRIXhC&EWkHgd+N;~U1?Pk?fl(ShGMMOe^)kjBExq)+DM zSnlAn$I-wAIyT^s9!;lX)SoD$L64%QYDiW1gziN2s;$OXZYK5-wNfXWGwf7#u#X$O zF~uEX`=@oS2SloX?vOboymA*^`{m;pnJqO#^HFaS_0tiPZq3dVaNdMSAygvW%9Z<% zspdd%OPmKrN3j4a#I;SiXy%inGNnm zKCG0Ia<%8xYYZ{2v3wD{6x}x7r#^c(6(ut0M`*Tz1k}u;~{v2f=6JwlzW_xJ_wfb9NTxm z@-1vDm)kC!6#&G=dc)`z?*<_!-y!MHS$n}2rpU1cP;U1)}ZJoCq-zZi-&V8Nh;bj?xHJooFWdH65j{NnH_`FsEmc~$)0M@*pn>M3rjjii-;Is4u6h2pNJ4|iDgWoV zZ@SZ&uWsfEw|GzlWOy2^#4k3PvkNp>4%%FXae9+DfcLCNP$0bcM^HoJmoKneiOBFu z|I_(aS=a%HoB%L{g1b&%Yw`|4!ORKB5g8RtqAVXucNe4KgjX^HC>K(Y)ze^6jltu< z$ih=#PYqO0xz^O$4^7}Ht4wj)!zkNS$h++fs;yXM=T4fvuT>;9rvGWY+V*F zN8q5WcHj&(4$4+aF)|dZ-?7JRL^hoslT$a?Ll%t>>DO-LYX9&~Av_tsJH>g7v($%f z!T~suKS&iOG?vU`+X2k5fAzR5biNMOmk({PUVh?CY#XR=B{5@!RGd!#ZcYVB zBAUChH6gAos|qLmt9X~BzQm_26*5lZ#sv0^A&L{u6lW4@lvoWOI3|d=2xzA^x|6ApLau#-ZNLLy`8BRryJVO0fR*yMF^iUJ)E>UgC>E5J@)n2 zh;}Y)|HeLVZS}%`3G2`TKtkj*}z;s zkM{DJ%J(0*_)XV{qTUN}YC*|| zTQGZxH_mZt;k?S$zEc0ILZe6XVTscHiNfo5e^%nek>bS{SEgq(gzxpsO`jT3f8Qby z<2t$x2trpN&N&e0?jGkWsoQWidPfX;KDUr^+=qX3mm6hCpv)?oXO!nL%{G3iaCZ&e zc|i6p79eYX@o;l@z1f-^D~g#l_tz%c&DyBzG6>-B5bo@dY~_Zqlx1`quNmM4h_!ML zv#g1-u_`+`ze?tFkI=8Jq@Z5Qx&xuZ^W%g}@Fz_0&f8#i6RyXel+E~>a)7!7xz4h> ztgWRH`qz;Z!k_C$#t(p>K3^k0!O&l9Dy7_StmmwEVTxY`dkp>R6SpE$tXUYyN z0Nfq+yL*Y|B}|DVTJr|^Y#j+^evZmSdiAR_Blb^>^IF}}?7O>nX|c-R zBlIhipAAU*LAIN|Qg7{^Wbh7*7EzT=p9_CwEf%a0Y}rWZ(o}Y>YM820;&L%eNe1m} z!DnkP(r2I=y76pdM{{i`=T@A=+v)RqmRycVfqN6aV#R#TUG4M>zhRV=E1xx3KSN@e zbp@40364ul*=E~CbU2O|)GgW)Jyv;UXPdUOEb{Oz+NSp@;^Sm)BD^WD6Hqs$+H?$d zsLFGriYjtvH{EZU%x9HUqTOUE&;`s{^GfHpHwO9|I_@|8xO#wGhwx_ZWD@J`shl zHf|IOUn}kCM5und8CI=oF>h8J`^h`>C^|?LW6FnpfdZ+^`uyd!f9M{2+n8g6dLTgt z4R2Vbj$D#Tb%W-|t1XXB*zbJgnSB35SF%2Z@q!y5F~BFh38Kc@LjAIHkb() z-rY<+9S7LCPrb6~2zSSv)&)@z^!?)^qk5Mi1^bTw3kH_ND~+?G*J#Le`D6DT1FE!5 z@uUC4B0f}|=*heiq?UGP8%93uoFdaA6+kV4&pu zM-9NV!FY8y_`Eb1rI=mNmyUq9ccLq7KNliJdvk9{7pTC<5-top|lv&4OT!FN-%>(2BBcI)C!(1Eq0kSY2mWB1}=XZ~eG& z&oEl8!*`H`=WnuX+d7U^Q$WV828+cV5p6GBTeox$^sgfII2KRGjQ`8XH%zPhZ86Gw zUe6hMkIrcngELNM##({LsJCbOwg1bTRTRd_P%)8oU|$+!vf&w+UTRW?wRazk5k|9< zYY=x@N~G7PS<5a$$UiGG?T);M+YC8kt&`{Dag(g5%*Jx6L2W<(?*eBr8c&ak)mzJD zbFP;DIrLeVw1JyrV}dY0w-xGbj!Db<8x%AI?#mWOA--bKB-UO35~mkyZPpgFMKq89 z3mAW{h9~%A3MLHWzg|9sgiC1JrFtxk5fuDa)AUu4D1bvDT^B#Rz^%W{PDZNq()i_= zGju|WsV_^T!O~cwr}eYuuk2;wjX-rB^Jf1~=5vo;s;|Q_aDoYifUkAC!TsU%9bk2c zAK$#9pL3~1l{yO7tj8_tNc5&`%_hmZC~EIHcNyq%Ny?g zeWpf#g}n~Tk5629S*N%@Wd$Cl&cHpA+%h76ssj3L7<0ysVh}5#{3n!eGbH)NVQ6BZ zVI9I-->#M1{Zs0Zq6n|hzR!@9)kj{^+)9pe!KVLc>@&T@D99k%EuozJ6a$9;k3=CC z^gj*kz{puF4kHVe2}}^8h(^h!T1Rby!a?pK2JYOtF8(-?LV5V#63%U7{myaPVvDI# zb15E6I0AwIKPIW-NAEgD@9NZl{*o=*82_H^mRc>?V&DVvdv0E}PG1?{{*7nrXteJS z_e6?6K{ER4E&n84c^4%X22QowUwQg-uG90otGvZ~JeZQ&@RP7Eco1$8t6MZ%3sT+t z;Ky%HonHTxB@`$ShA@^>OBt50^QT~M28*c+bQu>hz&C7qLWc8CFWnyKCHrjm^;7dk z`OA>|Ht6Y_i_|&V^7IXFbE}K;?}tbC?Y-UAtg&f%~l^#Pm60lqnAL}1F2jxQrsrPI_2^WEZMeAn$|-r` zsdzEOQ==3IJ&Oib2QN`TKCu3Wfv9X^f5%suztpV4u|;h+0A+>GnyMP|gQ(|anp**C zsgd0Q{5;w)@yWaUlVAy;bWJ4w02V~teQZntU zte?FUbcXmCVY?PslQKZ^&u!de?>5_qI{QldP7m*cYbwc0?7?B&P3%EYemrA$!SIDr z4^)|XhXNN=9>pq4o7;Vc*R)SAE*Jy%t_W>)J+~jH)&*^*0>HjNOz~Uq56@$X-<&y+ zyIX-sZ-JF{)#T<>OdB*BUBn3k>?$J^7!l0DaiT{F=c-ArbS&gTc>xbw9}Iz?{M|MzxM!$SDfeLwL}hLXnV&=&j()7o@`LH;dHQAL~17 zbG96YYXOVIP;hJqQ}Z0A&UZ_4#Bq#@#5{cdJeBc0F{SmLRHC49mUB0i#YB`PKBir) zszz@9gJE2b&}I;jq)0n^=c6%A09ixm4`-r-93!-XFVdCMlpUpnwLv4*8CO{DKZ$o! zQh-r1B)kwtAo6Kx7X9uyvEdvEL4kGlan@DQ7-kLI8W`8j-_!M*o1d?ruV>V&Z9I5A z->7+=+*3uQN%{RPR2}Q9n2Igg&1l;56 z<_>*2+8Y@^k|Q9UB^mZ`c8+&a5h>Ir+Dn@q8>_05`Gm(e_IXkJpltZQlzWt(&$uI> z9j?R)5j^Ubeh*}hY&epkmxBj0iY_sbTAG0noYj>XracA|2{`x(5+!B?D)N^ zzB(1T8HHJfG5^i!4zb%ABd!AVo|uC~oE*n^P&^D9nXz9@dhd}q@B&U?hx!%ozV{|7 zYmp5Y?m&*C4a0zX{mfKZZ^#g`?Tz4%SC&~L-cg&$wO1va=< zBZTWp=5#XqrAt{Bx3qQ3MpuZaj|U*MtX}2!A@$^*PTQD8T%7bzy5e#0yA<%8k_R=i8?-J z##`EFJL%aaWkf%)KBfLMQ$5>^tbe)21DEhh#XFQ~F4yX&rf}&zO8$m%)ChSI47rFh zh+<&1>*++;yAhZ`;t&{%ppKkRq!14VjQ#n~<4%&S+!rL8uK-a-<+THlsc0fAWebAf zv2i5d{Da@gV3GN;qjsawB$jR+eI)mqZF{M?vmzF0++X3UdhzQhyh=6kWifZKI0R6W zd60zN_vkjdUp=MxM}WY9>SdPZ@jy^{J2uRf%*7^xmU@DV(?pI=mjYHnHuYy}lt$i3 zaPjzrEV23JH?YCEoI&57K1K>&vGc7B>k{d=op!4Pq1qvy**o+#3=*ZXAi`@MHuj{h zB|f#D)N6D%*c<+!m`i@bh|?8ttHHcx8NXLa!{8%4X2-V6J&}f(?XU74CiKt6+lpF$elqpaDH{TpesaD;zR(iLsCF&V7sq=xQy5rXcE+QXnBx7K#N^V#R;riv^R0 z43~$lin>pDh4*j3e?TQ5JxURQ^P_XG#&hVn_jobBomcrnOY9;!IL=&ms7fv-u00w> z>1S%mmv~r#=s`jvehzppsv;9WhPn-ZSS+1#`WYiT z0>Vo?A4Dco=?(*Pgn1y%kSeX5{Qlx~%|GjzmX%)NI#$mgt1NS4Xn3-OF+=Jn=Vj6| z`aUm-nN_lKc+#a2&a+$}__OQuBFt|VqPU|l#{Eg*w*KjzClWdE+6r5l2o>rF?pxg^hRsSzrzE?~be*&(QW()~xxlNjKFL;=X(QQ+Jsa7=wQk7@u9&) z7DnaH{}&Y<>f$WaQjmN0<3ULBC#zun4wdM%ZDZ0G$aNyuS>4Ak=Q=lil)dIMs{!Xr z;id8n-T7z->CUFnogd>Eo_;=mXKvY|qLA-A^y49a`sjZl)rnN+p}eF!qB?b|Iq%L- zj;Nyv=UpHLa=`l;EF1G zGwzl-`zqp)k!uDIk#Sk&k*Yo$4dMq`mXRV`f@}%0B@J`hnrw+}I-2T#QfoC^vOLs6 zo&zPl7zjA@C%+E_o8ubYIVt*!t_wL4rxS1+((MH{?lvfr*{8$Tx$iR;j-}}ChN1^iyZBk)PD6Nnt2hEw3TT7%&mMjZMnJiK!B3pPE zu{?M6VA8`@N1e(u^hq^;lCAuNWJ=S>lygQrxM>k*+-D@t zZ?L@%-bC1br?CAQtf#}`*kwgJu?!PTn(EBdX1O43;-jBfq7vrI3#w7FxL+dpx#gYRYUg64Nc80Yw9TM!TU9zU3I>qG3VvRo(wAF ze4)7yu*YQ4Of|-T%N1?8FSj+ZrjX0U@hwrFxu-f=b>5j;V~9`eDdgmIp|Ou~7|ajA zRsNziriiJ%Wez)w@@)RWj#jl*r<~=)o4)`QiUC>{+id$Jm{YmG^Q zXM~0A#aD{;C3aS4UZpkWh>1DON<*F%dDiARZ9Se<*Db?;wyftNtfz~RT}5`a3Zpy7 zt|Gg72<)n5=@s#{uM5E zgQB2jBGQ3>OezaF%lfj0CfF)|@jaH`eHZX)@gsZ_=Na)?5(xNJ#=ICdT?BRxoTa=& z7#ZdvEa!1QPBLgA55`EO?u(sRj%b@hXDW;t5!@$`g zcg-!$aRSYC0>kvFt8vS;UtAzOv$W!pLe20*6;UOxY|15`i zBKBhlD^ghXQglpn90PwpBIueXx&}TShqBKDPM8nvqi-7Ho8q;B=Zt&8OGKEWOscL- z>YVj6F(+TSE>Ha2NHI0Frx0mnp4mT5;{l8ASX!&O8j6TVgUj2iHnY5q8bXNMIC&_# zXnpH{H8c=sFd`GN>~PZ4;V_=_%$Z2i6~dSrLWqe$g_LGc41r<@6hk!2Y3qt1G)<$k z`8;z;=gXiL6h)vY0!0xhig*;x(pz*B9xl3N$-|@{Q15_x zhw26y4;Nh{dwf~=s5~c*s05^ypHM2%G^xaYb)4}i6J^oePZB%odD|KxY!C7g8OL48@{1l15&nfFlM+Fgkxbobq{)4fos<~T>FkJyJX zvpanR_o(GEPEDKknC+U59Rv<@w4TM(Lg|b8h<(yWP>N?+*5<5aR{k6@a+MAy#~5yZ zoabLKBNIFGRLr%AX43pmVRmC1lUCB2LP{I+EuHg+ftX|C9(YJrr485MuS%1WAe$JP zZ}VoBkGqLIv4WAS!Drn_F0M4OK3`UY#QLv(yn*2wDgOVNFCh?*`>>mqaf=SkJO62)wVD)DIMR&tPvQ z;DhJz_f4N@Px19lf<;8qn}nnngZhJoxcAO*@1I(Y6G8CiPZK)>!8KKXb*ROtr>VB% zSXz(iW?<-UVEc3d1V?bS9=LiU??3Vw_hm?=RL`6%=dFzp))}h`DQx{cr?tM#lK~sf z9Q$<5vt)Cf{luU`N;C8A&2J<&D-P9rX}$5DeAx)1|Kx_KOpzJ2?_ zjp7S(9msVc*U>Pit;cn!s%2V+O3ez_F<&ERRfvVe2NEAuSjK_Ek|=Y zrrEY!XA5+P{c5^@L(a`DIXQ34dz?nHIze%xEF_;`N{^ENh*201nLk!YS0v;{$XF>L z#|irmHUg!vE)CsCn?uRX$e*sV4E`!2C&Q6Q_`gze480Z)Q_|(h18fHsLS5Br6~!G6 zo}$@Ne!{Nn@*LFr4$ci9;r? z+7F+KJ+0)#_cEMJR$Yd{0V1w)Q_fe%hW8tXSjjuP!3B2rqzd^lnv>)(x^fzrjY+a{tNkkuVy)IJ^o9x9gCVewVneao-RWE3;C}q zR%#*th5XlB@Lx^KT2q~l=C&Qncs&Q^YOg=z-7w9TW4i9fWZ35dflPS>8|5I*IBAhT zAhP6e1BpK%-Pnf>^teKq$(Ma8ZKfe_hP>J0T5zDe88TsOGGQA8pvZ1DiQVcx;%20O z0@g&+8aD?|Phriz!*~UERI`t&?z`LL_wU=9s=966!342=7&SX&33bizEN*II@I$#}ZPi9~>REpPtgtQ+whmN;UMEbyl~$}3W~y*LZpem=kl0;Pr`Vn6_zKAoVS-ZRU@St>_J0QEAy=#i7bl; zEDXg=tG7H^@o+WHh^*KwG$M=7SZ!)~Gj)!OWa&U`VA_1ybe9ct9-^oPMJ*_5X_(X2 zJ%iG*HB+aw!YRzvWnO^MP}zdYmMWV`QQ3mZmP1gsm}*CN+P1z0VTyo<>m>G=C*ZY*MV&Bk`8XA+ETs%j=y8(pP!zw<#suyp z5WS)UGQmP}$0JVok5M8cr#K>MmIs2T4fVx;g3e})!Cj8UILL=|P zcql>ttlH4^id5kmB+a8egtLX7Wg%7aF;k841_0E&KK@S9R41r{64Z z(|Wm0Fjn|2EGzn4EcWMr%Y(yyxtW`?NYLP^?(o?{Luc%s$rPSygC{F`(|NhcsX2w5 zEHadd7^G<#6g)caZh2Gg%S}zqf$u&d<%X0SQf>`%+M0Egb<1@ugX+)W7Ec!;+lFjg z6-3pMZ9}&05ZE?d?U=eev~LS^+v`t1rLL~EEKS{kt&{KQj zDV+WG!&5qDFxQvFTutN-X*OgtNNHxiy{|%KFl?*sU4@}o-dqj6%*395LJBMMt#vaNh&-L?QkJY} zt%hi3Voxa!onBOW#G#n;-f%@n){xCeYlYi}o1|&i(8%13$I&Ye?XDq@`JBUBjLH#I zj-Ya+VNP39Iigt*BipoK24B7il_RJesd9$~Do0Q`^3PX}(2i($DUE$Bu} z!)@7u{1Sok63oz6ULilJhO2XNDaucpBtJQZfCjphCq*iD zk%E1Gc|@*8FagMpma3BvB7yameBfcmT3vBBWCR?4%>_&n)1>Oz8RF48Oa3h$rZei3 zHo@JLad z5qXk%$h;LpV56<|iYab_L(bg6mnh)!$Rk)jm-p*>v)w8H_B)SSm;8G)0~LN?y>Sq1 zjJ`g98CA%{)I7>|6MH<#<} zNNHrDtvtzCFjKyh zAL5~{X{ys!EggdM|9>&eugqXyx!ljNuyxFT9;8ICn`8qxd^did)Es#Um@l4HK9i@~~_z9dRBgaLsT8vuy+oE5&w$!^6t^9kM8gZpd=j z?cBA=Rga9A>?ip9L@F0y_DeG&(&AyKg*@Qz%1)9u&42WTC$i$np?5geaxQpv5%zS}=1%WCV{F5Lm)^`v1U9 z(H zu^dJj`5q^Ol=RsHmxV~amwL?{x=j8%eY#)_qgO7qu(D42L*BxOWzcOAJiH%&-$lia ztmtwvgsHRYepw+UlzYCZPs=OhTR>pViKSSybXIW3TRnK#9EPNv!;9h8CPzWJ;bUPq zR;IU5+%&9XG4VK$J22CsJSpG6ndg)!0E_}TGGKZpY&4o`I^`?Iia!nckBakV*<+Hi zbkHWJMO?+CtCgaoaS-UbxyfA~4@}e9baQYhl3QrhAPb4Ii>k28(H~OuAc<>0K(y%hu4VmAE z?j|AlHJZNjN2iZonJ~?jbCKXd?&PEj-}S^DJORvHr!ZMVfEN@^K!3r4l}^S_fj)gM z@h$njba{yzliPSSnZN!shf$Lc$;o3{Q(DFXm3>?jcRet0$c-F0msJ;DPhHrZyo8tK zEO0rQ1tz1nI~l!ywIYK_VDMYnEz6JmI;8DT@ylm7!^@5c`v9>V%=6@`(7=$>K-K~) z{&?w=ZG3RpiV(2^J}(82wU+9%sClcZ9mDPz^#4}jzoJ*B@@;si3JsxqG*APFJ3Uud z9nG>VGw8X2iSqCNEh~C|g{w9F)oT_A^e1vf&X7zb zynHI?I>Yn=jajBj-5xxjWgC>)rm4H8@Axz1%9m&m$q4BNR?AdmaWY=ud>^lsC{x;b z_s*A*8>}p_CdfswC{sQ8gY=y@erL4RcOk!HAp}8ufmTfy>RUgvA;1@P-(S=@@V)|d z%h&Z8Hgwg0@OHWGanp4y)6;zF>$(}}TEMv0+I#pfkK#cpArt&(|_ z45F8I=u0pCdjcBFW(qwMFBH*9HhD{C6Vj@`Ssg7DZM+aH6xYmgBXYo4t?AJ{EAOuq z94_8n<@2|>H|OPhn;A+Y7HlgCFZ^RPaQvWGF==ZB`ha68pmJ)KS?AlXb$bcxt=WF!y zT^FN&5dzPD?(-Bp>)euXu)C8ftGd-J{Z~h{)s15#w@pPk{rfcgAKsSBZClq=)0P=s zQz^z=ZGjg^>#OUN#jVSggiW_BOIc%=P=EwWLdnYKZxc(pt?7xmEAMTpk_cKODHB{C zPR{=R4enJhq>gmz^x8qC>QD(#xt6Y~p2aMGt_PZSY=J@o(Tv>!!TrDtFjd~`GYJ(fSfHTkP z^~}KZe8X~7y|}~uk1GqTy;vONV*UiZlamPIO=PBhYdnEKl^KmStF~su`+g7jlM*=AhEFC|%jWJdT^Ul2A`f#G|;$M3n`G7ML5xrSyfj*mh` zM;;Y=wN!gksUD7sJsuSrj0(ePxvIXb7zxh)`!f5__;LR2o+wsHMHO3HD?1g)zLZYi z7=B)+Ho?ZI*i3dh%2@(W@}UNkMi~${@ek7)l1-v z83upsk zqhs0~O@TcWwNzUQZyab*WSzhXeS3)7_BNmd9U**I9_OsJo zn#nJ3bR6rFX-k=cab657QB5#THu01U_`jFUJ!{W~|47~GAFtD$zQh}C62&G@vb|kK zuq(Ns<-zP*)_~r9M$__r<-U9G4NG{hRznqOsq>Hi`02Ldym?{b5`ry%{y*(qOK*Ze z5dJG%yeY!&@>p)Brin?@8rz;5U?G~c($s@ZO#gdl7qF~=f{(_>CZ3Gh!DWX1X1ItkY1|2o?_~!!KjY_OM`(o77BsF zX?kminf^rHse|x~;d&T}^avL2y=%9P}MBm(z^t9~~tiV^WzBu&x zQ#vod7u(V}dVJ^eV5|_Sf2i6MTi}VU=PP=}J*#5=8gW)E@xRhT$c_m`C?=L=yQXdW zZpdQdGlVFOZHBOfdDmB=MLmJ2v_U3kFOyEL)X1g;%cNaU)t`UGk9zCyz{ Wyp`9}P;sD;*Uv8sZ#9~4YJ>o6NS#9f delta 20910 zcmXt9WmFtNwjSKw-F?vD?he6&yK4v@q;dBU+#$FV+@0Vq0Rq9@T_4%q_p46ds_r>c z-F54eTSdK82c=Z-WKr<&j_Fy30q`O45Fzl8A@EQk@X#UfFd^`;A@Fbw@bHkqz>4?c z?x3@U!Bw^KW8ZNH6+KRm01kVt#LkzLv)fwz`SLrBeuVg7ur_vHUsU-#)=?Lls64k@ zJ-x1$YUQ!^sKuzOxM^VX_LZf0g=K9OjAU_kRa){{YQT{VqehJ&Z%;@p3JT=Ylh86F zQG9sbDE+9KmT}b@nLSrxn3Yfj5TWlncaky1!{bbI^x4pyx(Fy(NqswUF^1_zB&TRH zQC^9~+i$gK3602V^6WX1>PGU%OWKk_J%`+YA?rpW^b1cmapUYbDI08K2q@Ay`#3qT zR375?V$|T=BY|s$ahMQ3iu=83Ibdbb;b9+20u)*!2nUkz$2RyO9_bhZgwTBsrgaX6 z%TR$85;ID8TU_+{h1w@me`R&!ESXwluk9^L*veP`Ogwbc>^B0jT^ich7| zb&`6I8=2s6P>`A|$hlQDoud{bZlJi}{o=G9EcHq9bj6O-*xu1tzO5BZ+eclE*yj6T zch|B?4vd>_^^Lx4IaD(b1y>5%cOs?gm2E6PS6bk9*IikLD)bdVUXG{hjOSjyTK_k`K!E*g-Dh1xtGqOCmOh3bji)+^UtCuVue8f>_!~G& zbMYir01o}X2XD#}3#|oxdHWvuck@IrpKquT(maC?ZyfYZ$eYb;t+x}0w)hQDKj9;6 zQ!&D3oKcfn-JEIyAWtc0$lJy zgdz(z1-WG#NL1zklIhe3YH_+tQbDzcM}!C{ zS}NIZ?RU7w>k|t0c-BVsV>$x|&qJ$LGDx>vqiV$-P#eP3@X1iIG4km49IJrcL20{Y za{kX;<`?MWE}P>wLBetOS8j5HbzGx*=u24qNgM``OW!W^!G9g-D#PMDP; z|9AmVFY`u`>d?QTmVnH$Nr#|Xs-wkaUV`1!>$ye~L7yBl(#`m}3$aPX&z9*5FZ7RZ z4D1hm*EyB49z&%{90X#eVhLtA??2OBT+^ndDkqSsJMYw|qD9^dH5VhC<{gl4#<3Hi zbu*)U84d53NRO=Ky?n4vlB%~s(x1b}i}3;yM^Mbb?nob;0)X@_vRX5=l6yh0vM8d8 zCgK#y`9l!Q$ANTQ1I${`n^I{h@~{JAi8;bBf4m#{(fF6&3*UW66uvVuSvc@-v?>W1 zyLTU3Z018)!j1VMr>=b#2wiThAlZgGd~9QHBhJwK@_FJ+tYDHr=xumcdmoJQ|Gfs$M91|{vB~T3A6X2o9|avP#3E* ztp7R%j7n)LWV2&SY3{lf%cphiS;%ZB!faqeM2>j407{h>jU@5PkG!v7R{uIR%+vT`w^7~t9kMd)=an~13>k zS!Dld^G&<2$aXoo_)w}Ll8WY6SlVf^$;yux+3|U8j3J(oOXT|86Y=BCMNtW>WPCW2o$G;F*)~`3H zMSaVtaBLCh_cl=xQ~wZk@?B4NO*$m_7LLOl_?F{V@rl04N3H-Di)fMN$cj3@Co^;- zu~3ftSEwB|^!E`Kg$%^64bYg7@#y}+xjM=#9DHQuRRw88YDI=Dkn^AtSaLox4JfiI z!h0Ci!$X_LqTk$ZR&b9HS8I->QObHmK?b{p`~M?@t99`ojAgv;p6J0p-6{9hs*Y9NRiOz1Em_AmAT+ z`}8f@VbP|g(NM(Z0SyyClcTk3-M#5iU{h*TaA1HyH}Kkzy0BYgUdnuKxaPd4h(l*7M~e%iWQ>>A(5{K*BS73Ch3qPIm-8jX5D3Sn7yN6E9)a z6tC43ue8l(>{xGUAKUl~%%5gu*zFLaYOUqJ{vFY6i9#|G{pg=PLLmb;g_T>3Ci-!_ zJl@Vtqr;(Dz*y<@X)X{a=j%(OM17#!79QjcLn6j zKF>BuwrN~La#%80$tAu!!LlNEZ!2TI$F6zv9RoG_80 zt|)lyWRK`wIuj$}%Uy+Kzp!H(anU!db~?b69Ypf=;)(#;zF}oWFm4WLEa#4vDP6>q);Caj~Sb>-wD zG_t8}sVM`3o4vnnp6+zA3XWpY@@q;C^L}&6LbHCA6rFnxzY-v?`tzJ;5$Eg{wb9Kv z*4^X2-WR*Kk9txbaCpOo=zJp)RenTnz3!s@k{C>}C6SFx(289T(?3s9g?f`jxcig& zh@rIcyrAJPC?zF|yb-n5jkg=d^hk!zc5y!Tu#p6C!0T~4S?h7r8n{b+27 zI0KV@c$Ku(UgXg`OTf^pd(P_r=#xq{+oIR-DoRw)yXVPO)}6-56;nK&!d5X@k{X9 zxD$-=5)>rHXT_n{`t;2U?PLTy1Iq$c+!(*`H&@M+xA3*U@p&HzrJy&D(XGOGz56m7 zEhyO4>j#2fZbSEPD8uU|GD-gRiL{#2ji08vuk;A;vizxWch+y8uhZNzYUn#&8%m9s zRG{QEzreCdxYwz&=mm#>+5t!iPxx+$ky#e??A_!vq$4@_8XksjVIjS(bg73@68f<> zg5j&bXZITs^@VP7{m~ehm^+KM3)dfi@9*Hpo!WCJSa~3Sdk&BLL+v->K*Nbt2Vd)- z$laQBu?&tPz{s-Zkh6A1;YWK2m$ae0CEP57n~-*xE7)R}Nl{Jy!2lpd1Ws7z2xD|xwGri)EF0RRnNgZ6ZbEn^9cZ?dorbH0;CU~~#5-oNASprepLC{zu z((IQv{xzNCM4qH5TKQySwD{CmOw@{!v~f-!Py|6+5{Xb1v80S9=Z7uprAJ@zD6eFz zQkOQ_+YRTB1|qXgp9LDXp+aM7&CE`egwMZVoI`9t?T{zH`hf1cWzGY356^2CRI?hn z=BK*iu$S7%VcbW1Lp3w7ea;LGvkTf5*Bt8-(T!K2WmQBWkF~oI78D`65kyO>HP8%K#jW{Xd1Qsrt2~vRfr zN|P1LjehCF*!?iY*$EA~1FNg&QF1Uip|~sUR<6AI_E!Qi3tscTS9kvx6)d2LOLn}3 z`cW|N@H=D0=YPMuldRRWsj*)j#GcY*>kVu{ZqbA4&Odfrh@D)uL|jb~}>= zY^5hx73h&jIApZeL&5Kz?%GotJbxnUec@wFcB#c58{qL(AXU2--Lm3m*y8PDh5-1A z`?tmlV85ip3ObGxg9IJZhHjQ4Wq&7JH4`Fz+;CRfbWZV!=ioF}M2$;3Z}g0U{*eYv z2u@(&f_9rq!zcT|i4avI#`tQ=A^3GGk@ZNbU{~?`yBy*H)hC`ag zqC0p+rINA?4%D#RVFiG`+lKB_WIL+lV^)5Y+tBIUDyyno@$+K><&vU{elKU(D8_j> zctak^*e$;$^v}27rhEG^j7|OOhw#P1#kIVtC_s5LSnAD%Wim0+=Eno>1eS&E>(%9Qi8D*%bRuVE?6GpZou78_&XN|;o%5SDA z@-foIV<)!1RG-E(6AdHohu)wm1-tjb7CyR4#4ehUcOfRfo4&Y%5uhnFi3W*w$Gm=3 z>Qt5ttFbv%5)2X6=M+FQG5ndL=g>E-5AQaKE3D7x%7$N#+E3ugT^NVHx=@$MKp&4V z?r+Wgry^(^SiJR!Oz&aw*%b3N)ia#r=c|g zF+v|r2bXnE)PdW8!uv=7Mdv8JkfD$VVjps9p0n6N&tZy#WaziXjfFc#4YRSHiUa!6 zf=qp-5t&x0p|{HHO5@?FzgOCsrD22WrwG#yjtb0fep^47a{y5ui6zG9Gqh@p>&8Nq ztp7))z>ikZsP|YgD!oJYNi-6Pf>9{K1iwUKZ1YqTpcPQkI;n+?{74?0x)q;#730;e zCuJFWLmwkzbDlpUXlbe|mNZ6i;c-Nw=3gBrta(+PsxZS_8@CW5Jp1z&6|1kg)Od^? zD>rWxYb$|$!-1KjF2v~=D|I;DLIPBR8hd(N4&U0Z#+yW+m0a{ckW%rPg@1&dR4gFI zN=(2*-%jxS5}nPbXcMljNt#tm_>3GenMS(RH*1~}Z(1Kc2vtjX-ot}(853O)N{x#I z=}gyaI*WK}6YU*JO*Sg(sf|f$I?K)l)mF7z<|TDFPYLu_VW%fCtVC#EVa5%yCZy@= z_Mgr_{9;INYM{U7CkP)ZTfMd#k+D0h`ZPthyNgX7bFD?cExnW6dA^TfFvrTwI>uma zXiz56FXEu31QIb^n3X8{_1s|RnDYnA1@9U826DCq@~JRN(bD%O}LBvoG-hh z&cEG93j>vo4S{o?E!D)-B@}Wr1*fnQ$9Qz;wVgAojtX(E!_@ooO@5&u>Wxlh+FD@M zNpmVl?+CoURg#H1oqe20Incg1^idJ5ILE5(->aPr6ihL~d228a?yy*{TBvi-V-0OP zj2wQ-jG=y56whQ@zfT0H2t%njSZQ{s>UL`@RDeT2OI5LutN~oI%`Q2PO!d82j6~za z;O)Kizs2G|o=P=gI(|cv(C>?SJM+pd_88mYGE` ziJ>8Wo#OM2NBOEU73Akn`qQ~VSrKfPm5P5RdS}zowGfd<#H_#-`Jwd)Ui4#qY8*qv zlmS3W!89+x_v^_v52fKMwyh9#TtOp95PLh1dsxBiIBXC?HLW-CDl1ISg zt31?4lJH`-g?AeNmjR0iU*CDWe8)GFL>6ld z#CGRn0#n5eIH&qV4Sp8!IgCR2*8sL=<7on*{g^0GI4FL6-YYm)HnOUq2bkboTN=*t z0viX3dH39K=R8b8S%Y*By$$>vsQ6aSRyEysEUY1Kmq=JZmvV~j%c{XHJH|U{dMxp2 zjcBgmqJBw?hGZ^g>ZJZT;#E>ahhNl#bHa*+PcBDZZ?-}RUCX})G8jh*AF#WnugFa; zp{0q9AQ559?EGuCI2YYr=qu8NLtep4j|ho=uObN3UdB<~n%ZAqcszZaamn36Bvsd{ z>)=mwymv5D@LV2fdD~dbfrGx_fjeTkx`TZ^zRM(94u>g(VJzJ3bfxYX5HGH z=S5pZrp`_fDdM|ndMPgGM~(3$kV7WNJ3gyKhiFQ50eauGhOvNe)}Ur zAb+l^ozppd`O>j>Ri$1lmdfIOo3BKJJ>8T+J+Cjy#`^d-n;}FaY!nfx_jD+zk-%vf1S7`OX*8I&;Z9A8mBn zmUkjS+)E&Jn;jd6cYliMW_7Hy9a!zUDo%QYa;1SSLz{Ur!VGovB!)9b7ndaye?*^g zitlSgIyh>LtUWZ{tBij=l0u&ei}&N}-xB>dfb4z5s+W1_H2ZjCzzIq4X_iOW=>W=% zUaLBAU9QXi>`1LR3+=nxi!^4n??LzrP^k5t)hAdRVkUunM&B?ZeWB=H$M1Q~gSp?+ z%0n7efPFxSktKRu^GE;gQ>EOE(eT_ z?Q+#Gb!4O`yXj&s5FEy(IaKU5H0?9#1pgLXc(T}B%CmF>>b8eGhKh_OY(MG$Hs>Fw zrL45kUSLrirB~$c4Kp+D51`8Oi_Sp&!2KtODKL+mw}kLy2E7xiVSdpCh#|Xximh-3 zX4|hrQ@@jK+l@9LO4D*ztv4J3fY}CA(U$3LwW#m78A#>wDo&g-9TzKm(IElR8drXX zmd>cG)NiWLHR{ROQW#btm>5iv9LjRj0QEjyT-g4e{{s|fY-wt)wOVKXSfXgnbkf{} zQQ1Ck8Jyt8sLekg=!qQ3T-5y}^&h@2MJjh-NO*Ex?{JJdSan>9-2#614!yyiC|-F_ zkT!hiw7X%r)$g0rADXe>BYG9xROk0R6cw}}euo~4d1lGT z1GhoJrb;>f|K`{acZVl4SXLTc{QXai2u+$k$@YgO%20Uro8>Jj2L6+`ach%shSY{D zeGv&W$_O)9T{!{Vm=yLQn2(n^b_n)Gx7bN~IvR_s3G4)ynWhL<;|KPROYvdBbme+g zx%E0VSq%FF+nX!RQdZQ+Ej0#ya5-CY7veE48bI4iT-txg7?AD_<`|9a3>80AKEZFrVb}!tTXZaf6t0JX3&v zsbamuzxJScTYl+a-*RQ}5p`RhaR0k#^<4W(onJ6q2j>3wq`RbVtjq$7EF9n4=bn${ zr#QG2MuYv@QgV%PBa$PFG%-}D1tqthmli|riLk#di|zTBU6Kdc407)}5JZO>e4FaT zfd=~XL(4O=caJ318E#8b))C`-h|%kYWWtZpX4TU}d(^}JGI$ofPyEERS6Pp7*WJnK z_3K@^)1J{(U(?g%dX=)1vT~2uxbxKA%2U`ZkQa73zR2`hv5(lr@5^tf9L^!SVxHnV zqc{7TQ{|7Or(Fg?cIjK`ku4RKpJyv} z@QzKfJB%IUSNp?1Wao}SUm_}|xqHO_uf4%QEE~+)vha9!OZj(EUDbC{CoX`QpUFin z0|sSOxvkAF?!at^cvBZut%&H(OV8SJPRG?Z!#@^C5jc%kp@`qV|J}aPEfQy{xA>J4 zt7T`=ym0O4ziR8u4{E^~u1p&LQqFIu60gA}r{Lb>r=-2=35>|u-FVaD3;fo0^| zwxHW3wv*()gt{lWFBW5JgP{^yk>g#ky9%maK6C6|@Wa&v_Y~w^#PMgWi>s&AEG@pA z5D)TEQz7%T(Ayc<@oygr!5D3C0ZZ@fKN0?dJt|+{g;!2pULC&cj46sB{|Mc{NzK)O z$A0`uap(v40U)q++?IOZX!+Dy+=^}>@^im6!XdosjH}g8X(lO8Bvoch2EJ0l+LtHq z=0D1KNUt^}F?{*rNmj=ES)*e!tou4*d6yxyi5q-*N&yo~@H|C@pO1-K-=VeTimOL% zNYlkr!+EPsLrv9Nxx4lYMN~cfjcmU~)W}Mn+qey2rgJ{&AQ4SsdOTVW>R%$lGFx9s z{OaBFv@oh1Ud_YM>Gx}sZie#W!xqVPx3#Ehe-6ADD~0=o)A~r4WJw5%+}Mvv1 zY+zI;J+fQ}JL2Sn7p>gL6qZ=uaoVqHD$HWZF)4-swwKP{YZZHJ$g3`1wll))wMDo2 zYj8gBE+yTOWTD5Em6eJczfM73d9?_^kDaOCD(kSnNpso?eWq=6hBjbLS!luTN;Ga? z-Cs^u6UK==9vfrWw{NLjaBKN)S8G1JJ~PtDIJcTcU|o5y{fXL_)}^x2rnuL{zJ-~l z%L~K{`Yd`X`feDLY^%6c{4RTPKkYt}0`UQ$FQTU)(O4>`iy(or<({v4pWZ1v3Q4Rq zl>K!33S=n+oIi7VCZUQIJHmdx+alN*km0XB$73ZF+)x1W_#6=-*di%n%e<;p9V`>Hwoimb^LIT4Mm5+iM5Xv}* z@kq3^rVA1HBD1k2bQG=W_hdi${-GDoYNFud|c#BF2^LLxM+C<7s^3sEESwLn6pqZE&xmz3jNy@f6x$y zpz?hk6GT^$Sm4A+oAy7DMWQkwhoe0NT0QQkQTyR0;?qz~%t8=aM5N9qeC$JyRbLWY zEE}|rG?U*w9U+NOKOm30qS$1|Bqc58%^Isx~)!EH-j?dFd@9 zmM->MW;W6-TP!7miNv#MzNPLk9L;%-bOoB{mYL@c4Vrb56Evgiz3J1vPZeAJyFZ8O zZweYvF{$B0UBSgXN;WaI?;u&oFo2H7CwET6(FR38U;-d&;PpmqEb<(`jbzQS1j8I+Gn( zq($D2_Sbh9$}V}f5&43s$e|o|;m>M7hdwlgAN)m%gfL<7Jw^W;1jXy%PJqekbXH^8 zDS48}0K^$T_pp0r^5LcjF_A{Ft#zt`r8FS5W0Pcg5Fb-lh<(9o5&U;^EN)pxT!!R! zs1-HgjcXPp`AUTK;N-rrbZhr9{FR5{5g1(l;Dzw`jSe%CF005|7VcZ#^KL}0XJV{2 zxDk0132P7pcg(bil>dT$@-{#1XX{lYb<;f$Hp0IsjbYB=MQz9zoHWlTr!fs%6Q~ArJ@BBPD53f2(FVkD{vWt5Bit#VL z)0_(B34K1yzJalJY~Ac2AdpKn*me+JMD%rZPG3JGRv&xN;&J%Es7^AWT`gJgFd;tq zV3>)R-8n7BClT#U6$(=wO1RDvAO#86jmhkPxsR)geI!94EB#4ppzQ-mW`j%i5TO=} z$sRzSGa&UM@OL0Qj@BCgA_waAstY0wIenFzW2d{-P5! z=IR+JD=c|Qk+}XY=cVl%ue4Sk5~+BMy+%?RQd>2wmv?^J65f@rr!Yqwh)#xO62BGdJSz?c zqsVcROWaC!3n*Qp#^fAO=T7@aDM5J_*<{Og(~LX+GQI>0Ya2e}z3d(TLa=COe`GyZ zcX{T-W(vRnL#OZJv^S_*yv-c{F4SRV&s&i*FVIku zTS_to6+jAq4T!#%ku;`tq?g(JE*kr@ER_ofDTHo*8%Vlea3k849j!TeKcK6pKJIZ8 zta$Rm(Dk4cR6p(xhEM+k7Y`){>*U1)i=$jV64Iz|&Zsx#5|7KJr0dFuv?^&WxkaHs zhw$|;Y#sKYf<7l^%61PAt$Gl{er&9&2lQdeF4_lg4WKKvi1b<$?yh!gv$J$%j;qwk z*hBzLFYs}vW{y?Gzy3?}tJ>;#e3FNSP;d4j+!2yDB4dSU390xEdYRLE22fmcbM33M zx9@C-dPi9QqO4(#itzIiC9kOxH|g`!bBz{60`T*C=w)FlzW-d!i~rpFsm%P=v1=Fz zKKCx@J$oQX{`Ql1goC-vBS!n;gFOAFz3h%2`9*v^Ck?c`(`l}V22%Uc#Ee0U(g0hu z_}}r=LLEib*AxLm3I=pXkz5mtxWDd0{mEzP$#&(8Zd^mX0aE&Xja+^HpP&AQto${v zgStp6`lr}t+GASwfvbZcm2HyL_hkXg(E#=Itt5)5oJ)uI_iU1l@%|+Q(-x?9&PPR& zkTsW@ILh~He3Oz;R>y65aB z0MiPAgH)m|Ixv$&VIIc_;O0Ms77<8Nb)LY+9%|bfN13eF`?7Z7Y%TsG;gHtsO5iQJzP+!yUha;IXVQ42EQB4b|bAl>tKSl^_)CEP4vP?h{q(rUbi=Zc6rJ_kKgjJHhROU`x;f+NbhuN{X>dB z`wt>Ip4f8cC6J8LFQqxUJE#3Mu(?ETGyf_Cose)=&^#g6s#k>@Upy`I1tHqcS56G3 zP#TUi_gK|-uF2KlO2g=r&{<#N?!;&eMsPlC_8KC%+%=_oi?NZsbXws~9GMyi4EP!pa zF~J1(%cRHs_veC|pYjIQ+$v1q+drYS-^qesgOho**4$4%Kh32Fwo0YTmga?lN!1%w z=3NZbfA1C<9#`5PN26IMq4vkt&n-1^#T;Z1dDj-_I)TtZ7~Sk7yAa^V}pH85dk$glLP_(?{4L`C4YiK_A)3Va%68}c?;vLu5Y)?+jqyt7^_nLXI8CzOp6X3n72)v|^N-UtMwF(j;# zP{P1qO}CY@EJXYb>1)CZJ6CoDIJXS;veiL)%KhxR@cA|i%aFL- z#CdcVm(;8QbeVkSA0L{)=v@St6eJdX!5B&CiA6r$I^>ND(D0+3@KLtlI1*L`*#0Q| z+t?IsM7HlxgekWp8jvzq{hy$`g;DMMZd~VUpfdJ_*}hAdF{Uq#0TAk?x^vc>5fp_# zKeQ!2cbCOjeO((v6S?Nf+U2M>YsP&17S~g7AoSIuQy~@e4 z93Sd?qjGfA^@GCWB#j(0-XSXKHWbFQi!L>RQ5t9N4EcZ=8a>&Nfg;;~*q6GRYhBG& zzhuaRRkZs}mt}$b6A&<}coK1|)zC=rqMeb!OA@Pf?^ti+U4P&`lFb`WO)e*z_^*uU zW7|}%tpE%6pWoZ>S%4ZrR*kRD8YBmqcnv{)@Cllmv6gC8bsUv@CA~E&2_J3=qo!bY zE%}B@u!c%DqO>(#Uz;0}$QG06kwdRPsxv8iKp)SFDk$uwW{D z+n|eJ_?(4`d3O-=iate7)YQLXMG>LHZ-c{`kE736!vCmF+^vpsmsDhOK4;>Im|YrlkpxxK z#(4ZXp_8Yi|dm@vDyx16^A+79H-gBfMZ*gXAdW5CH!>2Y3{PG70?PbRQD7> zS|T}LYLk}ydkB~7rF;5O`rY2i72qYm1l*%rp)P8|rKD@FDdel)7{fBN+{va6#uu+L zW=ZdV@cA8Nt^0KF!^Ck8QK4BC-z_#7y|y11=HztO&yY_(`#qiPD%|~jzJ*<#A#ciD zfA;eRvYud8XNa4wU)wu{eVgB~d}gR~u1S@vd#J`QhRK@lQgYEN%Fx^QOhk9Dfq>#3 zDjju#=Ep>Oa8 zi=W^-b#YFV9hecYyi&ccTUL)#Fch{Bbm^2T$Z<0XHK)qoz254GeTE&au6nk@{kzAq zp8qcEMo*c(B-VxHzk-wqUjgk|TN(-z6HT$|MqItlVQczK6%Cg3j(hJvgCmyZ*2wQn zPy_$B;jev-htEu!iFgm}#6Mt97gj}@M&K|Sv44td{x+N5THTr=s`=br1jf|j;rULa zF*Sz_&ncGMf)%iPvUV^&!`wnL3*VncMmI6%Cey%7)HZW}oxf4--VNwv48`UA$rWy) zM{rFqcY4AYan+x)2~@tqcTF$)-Q%d|a{lC&9{TE-Rb;8{IO;b3$UiQgDkn}2>nRQ7 zPkNy-Q>Qx$vtnb8ucE2P&`tonx^9+vj8MM2kpX1aX))7Qrw(ErmE11#`Bx|34?^AO z>3K2=R|Roi@fN8r*nxWSpY)m;8m<_^+1tJ2@TzW)^`b@P>|?>r%Ec5U|4vSP-T~u#WDz9g|HY__ zd+2@RJWUaQTkG0p*w}Gzy&3(2J%~LQ5-$?`oJTA1UPHHYF$WY`rafNsE{)@fRgjs! z?8L{R*7}9tkV#Y+)eD5K4Yqa!X}>+ir?iE z#wMNZ$vpDlgPEi{wkT4hw4sVk_60+UlL!%1F=|hIN9I-rsIy}gKnOC;>duTN@pWQ#t0cQ zvxPHtYT%LOMJ}IQiu$NP#540N*dFGgpX$dgM9ee-SxkM>VC8y*i0okPJt7^y||-r1KLLVZ<$76J6*QobUWigiUknf$=wEqbB{u{#H2WCyA() z3{!!NGb$!A_mb@Y+Ui7tZs^&)+HfD=3%vnFC75=799EGj>+XM5^JV4AyiY~M4##!$ z9@fm~nHiHf`X;ZMVl6EIYw`zk6K*iiqo_1Bt=(jNRlG=YhQ?HR-T=3^eg9tKys~L> zA(*l}qe7*by{RM@Paul<&qPW@v~EXqm1Bi??Vz3xNgZ45(gaObcGKnMV0KjVXP^=? zriBY;hZ|Rqk*BUT0I>m|b-5(;FXB9|calsT0z8T6K1e+5-LV;c7VGO_$+C{!#sCwyt6KmV#Fzea1Z2}h&TxQXzF*?$O(}0b3;2-($9om&M zVIZqYPLkBdPt=Mhvi}svK)n|D8$pV5tpj%7rhL}r4Y>~j=h-l7Rcnnp3Y?firj=1$ zdUtvf{(Qc*mcp2Q0!4-oFhF5r#FTO@nM+A#xiQjHrWb~LqF;>(DV1m-{X8}G@)%Hj z{T}_J`R{ROQ0u-VBa$wiPO4>Xs^?c5Tt(48pfC}#(b<;bz8p=Nm%>({sQjq)H!fBk zIO^|VD9oM+ndIGa!EpbL{h{h8qR;4|U8s_#=|i%-+vv!h6*&hVwP$gF>tAERoqryj z4`traeSAE${tSIev^hSa&kS)6bqjJpNK30w5!AVoix1W}D}oP247@-1lqoRs*zdH( z)v3V|891?*R`Bg}>cZgU@hwua8j~ZLY zc)QkFgdrIS??O4hvmT)s=Sed#iv$7^4Hk44=OC*o3>!HH%#OF_&eL7mQWj198LqP2 z!QfFW^CxcGxGub>G1IoRoD|AR=8?>^0l% znZ0M0`2-&z2hn3dFp<&;Gav2{jk4t|ZJVS)xmFmZXL!SM|q@2T_W#1G(f_SHGz! zS)C6s;yjONc$q7cg7VcEv+HxTB3`J;F#y2Bwh9s;rv5cf7Iw=hhpO?d_2;^W>&JSx zK&Vx^^qR6hFD^JbS1yYkx?DzY!K&TB`1j**qzE#K90cKe5`tum&77k)E7_oGyD~cm z{&p9449N?MNHn@`3GBE1{QLA-_MhB?0=#9Ga3Oj%Cc4ZL^+z%nPRsF#d*NKdtT3kv zCQoX>)F1xC+?QidQuyUQi1Q4I+m{{c=hs8xXR5N!x)(yM{7a?Ck-PiUm^x?_FJ8)o zrTc=OZ9Y5|RIJc!ct$_?o!WEP_)Ap6$h7$3`Ym-1z_4BIS$;L_AZKhMA;o3IOmTZI z4HHJ_o3&_v*ybjr@+(pP_1iZ6i>lDjQ|T;V+S>Y3Dw5COu4!Ltl*>O#;hHX#AK!_~ zn;kda8KCsL7@sIWiTP0N<02@8m=KX~@DPkL@o<5UH~RZsklgsb>p5Umn-fAUy^%5a zP&7H9MlQ88PtjaNP=yhsBT_)IZOSuDSFAHOq+uVAbs^X9nC_`~+SwyD?u_ACRGJO6 z9RBgK@jc4Q|F&|}HXC*ocC|uu{?X<6>fyQZre#IoWrkmBCpxE8^CtAmYoRlM}pi8JA~@-EU8`>yu8(XOkbjH&S<{*`z{x4x;yP(MiF)En(k3YVyfTMD@=8@*F$q9 z^C$W#H<_8#hd!DuqoweNO1o*YUB~qw`!g56gPNI$bqwmtbMfrrrJ!{rRd6bRcYC0b zG4&+9E;Ed?vyO2o5m!$5EMDs8t{+)m1+i^_7_MqZ*iLs{xMBmv`;JP_Pk_!IA)VhgLTR{H z;7nin!>fVyI^Xy4IloSdG{MVJP`nF|zxU~2EHnHpd~Q9L)m1W|T#QZ~!@BBMKw&=M zs5X7v66%w(8!PoR=5ZmIk}h2PvvZfQ6(=>;!YC6-`KG_#t`gfL$ z2#RXO?t7yErp3ORqHpwKw=~ZF;ty@%JU!ftcGy)J@@k}P6u#CmX|q}_C{bvC_>nQL zF`E@JN=+9Ml%pkjv3wpD$oF^&F$3p6b^LJ;igHyoCbqQ6VEn5HMcp5?aLzTGmLju7 zSjjbZFG?E5V52-0fB?ZpW<1y*zLH=s0A9?O?}so>x}FOQm<8gE6X9+3BDI)$TSrt% zfSOc2J)a^%T`nL#RXjh5I+*5CAJ3oLJBWMm^6Hgijmj{1)T3Qd4YW=?rClfDv(;jm z!(S!MHG7A$hDxk76hLq-C&C7?PE3f+M$-vFs}h5t|B?3n{3Er9pLtmo|3eGY59Wom z`&>KJuxfuq#1$_XEgNmIkd}eeSA*mThYQnD>PXomJFsQ{Z|c7THP$WWrxK~XCu|fq z{!$@|>6_-n?{u=+xy+p+MX=H~8vG8Bb&Em|wblveK%C4lH1%09DQi)l3-- z0JnbegOpWo3vgu}@3rp^&IY`4rW^JNDO!s+gRYiR(iqX8A}2y-`xR-b$v2yfCZ-Xb zE>c8VPYPzqncR^#e@p(%IHSazi-DYL6Asxr06eCOK^~w}>)(b}P^bIq`A3ZG%<2Wn zCaTi{B=a6_l}-Zw0;rfCqKzYBo+A)!nmCFE@)puwNWKXuVRqUoFu30{-uaO^#*F@# zTW}mCn8$;#s!~deCgR`n4aq_?QC_50($OCB{C?=b*`@BE3F3qJVw*JED50GGx48`;bC=X=S(M@BW$T z!I_=?ih@T7l&b!Mm_v%MbBZV~`xF{{-Ln`?iGsV)!;GQvk-ShlTk{ZHVdT zSKtB7IXuXGajT<>Ri()m_s^>X);H2N9m{%ncvdjS9uVaKN{_AN#(% ztU^W1bK}1Gezi4QdoOU|TKVGcSg!pk=GI7x9+lp(USR5#KD!#W_3z`73rX$xR4oqr zQ2yK<*D23=z^l;-Gqev4FMJS# z@2@ZYUw}FRbsoz@x+m1BW6k+=b=t*_W*Cm~mnJ;x0OUM`uCqVL8EBTJd18V$^+3a> zZ~MC8+D@NXIxfzD1;}|oprlYAYw${^<)k}XZ;3U9F%CRvEI0))J7(ifI9m$ucrw>C%T`t8-V6COvHZs#AFbpOja!rJn$%G!1{IToL4nH_doT zeS!kY&@+2xZPXKUDM4dS6DD3}60bxIokv5GpR;`Q2<5#UqPc_w!uHJ{N%k1Fcd?;q zHgTxt$YW*trr~0O`?_nJ9)#@=Eo`p~Z$j99uVMR3F`tg|laq!FEoV>MiEUh3DS4vG zF-f_xJa=NDncu0*3!i^$W=-Nrg*2DKm}q7>>EtsJb40nBufghOD&)81wiNXB%Tg;h z6OYD^SijWwiLz`)O%QJ=YkJuNXqS8*UxMzj43Pnz;_m>WZ~4Tbq(2@8=9G0 z)l@g^i|1=GyXtC9W6tKrUJWYfYN@#|Vvk9pxoV6zYua)*w>5vYrl6bU{w>->c)3=uPXs~mP2W$EGv>#l37N;%7^Jq2}_+v3TP zM$Z|q^ua0&6fXu>eX+KNM>qUP+SAnjU!{`psRiLZKKvzwzV_0q7adtpg&3DsPO~ZQ2G2NVV*2+S= z%I`h;!ibmB1pVLWGzz$QKha_aT8=HI323T0C5ZB;JQ?k1VXBSw>Ae7&x;PC0O@#zB znSfSfZ3#fMNkH@u9uJXN?2B%Y7u1YdGL%VW0VU}`*3f?xw(?*60R??CvS)OjYeq zuI=bLb}hp)F#g{dQqB&cnZ`KnM0h$27~0ip;HMSQp@qPd5Q8fr&Z+pe~Y zCJBw94@+J$;0$MfZhT04&@Cp94@*=_V}vhqw3_{RC2prb#94cuJ#` z6-9q{&t&*awD%xJfe|0eAQJP@=Q1Sd$X}wd{AH3H~}>TROUUF zr*>Z=3Elm4;`FXntT|2>^bv>9XZEL$pdPkdLa}M%KCxZXv4g-Nj@Gw`nk#)lA8|{x$q zQn&fFI{-HTZUEc>xE);$b4!|iPyT?fI`{O@-Rjy_=L-lN5vY#OEG-8-4 z^-WBH{~R9w!N7m2@}HZ@{}C;u<&9Z)R*1P4{+Sw7P+EU$3)Rgh+amBI$Xj{$fGgdv9^?Us{b5 zg5b>`CiWHt*HqQP7QvpT+Kzu?X?>!bfuXyB?c=2&I0RR#1y@hy`;Rzq{u<+lF4ptXL;;vpH$9s6`g)40CQerixbg_*_n7B>?0ibD;!_hF@>LXsuu zi-Z?31f?M;4b_?&=Co~fE>2xGI2-M z+ZMKV5T$aUOHewXNcKb| zJ40TWDU@{y9#Kd!Z3Qf<{XbzN(V zso7{dj^Ww{lwmrA-m`x{0)bVG`-W#YzT;UM!Fpc{0$asIQ*C2`K>QQ$^gm37Z(5UU>yx}+IFl%RV~vpRBV=5$6}3~bxSM& z9{@heVKEcH2Y`={03S`OmZLcx(`;L=vjf~=zZ-YRvAHD&=h}bV<2;hp3Gz2eLUbmE zbRJL8J$+{M=Sn7m0g4n*q|hS+7FjX=0~P@+qExJ$vu=f{$11xu^m(pc2%pD7k4;%M6;*-gnifLIjZMftQMC(!1T3U;g{)HEUe0r_}l$W z=e+m%rsLX9%QbP!c8r#d-Ifk0<`8<)9#M>;286hl;aPv0YG}S;Q4RaL?mL0w0E#(0 z6k`=x72v7Hd&kTYmfttrz;!ScQv=18hh3ZKE})+S1Pb`**pGNxeu}lt(uXL${;NEa@Py^RAmEke zcx5t3DNAzlk^+9Kir@ZVVKSVnG5A2^0c&;TUQMZq;|ik+DwzWQ1^f&6SF@b99skm7 z$HIT6j;*%>5w8~k{{sG1j+I)#zkq*z1pjJU)|%>cG`Hcf&MWj_JC! zVc10;flO#5Hp&4{DQclV#LAM2HxT+sb z9vNjc1W^l!T0qp&FsE&M2Bl+brjBumDJ;}wUJ9duvIUeaWtxdW*#gRzV^Fr3YDah4 zw!Q;li}8M8i)olG)3x;cCeoI(*o#RW5#fLGNx~9Y zN?CkDkD{1{EdPBvLE@bRte@Y2l#viU(ugAZ*Ep6dCmx|B%>qUf^pm9nlni*BA|A3w z9%W*=z*81bMBlC*F|01NGuj|)hy~QwX+1E zuGc`J3W#RPpRSi0`5f|*4Em?#hVIs+3Ri$MFZL94*WLV)F4lac3e0BukuH`Rxt{9@ zcVb)i<430IsuRw>S>2|!xlLg#_gh$0^toQXpPM^}y}6nDqDav2weElL zo^peyYwo7AxyhM11)VMv%9st4q*xTZIPP(EQ|{)bX6A(Nz5;RsVOML^qtwv~gZI?y(tZO1^{bhTsZ^3c8=;I{Xleu`aPYgw9FAGh6!nHAke z6#XZapE>3c^Z#o#LZg3{qd7*2qS;4H(Ey>X8T|pFEhDtLALW1%S*4Zc!jckDSffxF zjT3n&wnSa>WYK_0P9(8cN`|uHNS{T482Sm>4?n1`%5-LpVw#wDu`o#{P-)da-#~G znp%1?5Js*>Jcf$GA#Xb74?Hg_*_nzB87JZd-2eHVoP7<|^oAruGz+TUl(a zmy&>G$=od^@tS|uDu`yL_LTh4>1Cw{57~nEMr&?l1=)z6d1e7BUbJ~{55zP{7WEnH?IS90BFXKVLb5JF3ya zdRup_cPmGBpc^p_w`EUzRqZrw^dR4E(Vr5;BXq)IKVyGslGDQ8M67H^2|7RTpwl8J z4tpb@u}#(64so`QXK(?z#qF#yOLEmbF9SS;pS^o52gY4zCSh#=u;%ETa%CsqId MP?9@ueh~tM00Q_}dH?_b diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/entities2/publication/publication.gz b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/entities2/publication/publication.gz new file mode 100644 index 0000000000000000000000000000000000000000..96dd218174dbb1fa6ab55da289dc116bbceb9963 GIT binary patch literal 9056 zcmV-mBcI$KiwFp`FK%N118{X>Y-wX*bZKvHE_8Tw0PTJ2a@$Cf?*H=?6^_^mIffV9 zZ@oPy)Uw*G9b3}MvU|_YnNSpff(RP~H~=A8?d^Bp%qoxsNRXl=Qlg~lu-g(*tV?EA zR#xVhi63u9rJuybfaPyI^5e}FO-cqoy`lMk#aD0s?2tDh9n+&U%J}m*y^4$2Pq0ly zlY&8mQJ!6~l%@fr!8p!R-Y`gLQN$s3RooQuaFpd^PdqQ!_zirGw@B%b5BD3*uUIi= zAwDUB0UI*$_$PxyF`#1a< zgAU*!e2?MxeV7;x0#+1xd^s4;FYoj5-;YmzKVDc;nvDs)qH#izTy4t|on8#HkS$wL zqX|wXq3LBwFWLMo$~0VHrYQ4C%ad!C zc8}S$;L7CFcQm>Q`KTj4;6MHeKjt+320yFVSro-dOe=ZePopwP7CY^bswqp;*Y*MR+Xv*`b%^L>VXQJA z(=e-#wx5o(G|t+`Gu9YSblD-3j{fA|XGK=#0UO_p76faVc-KB(kSkOf2l1F)Ogdg7 zUyJQf&NA3$aY~aTzP>lA(lZ>>RQfdk9$)n|O>ugPr5pB}fAXDM1bIA~thc7Yz5plV zfF|7odKHgv$fq)oXu!IM!+1Q7(@XM!7A!Bw-j{<8B#SMphmKM$hm-h;U31$KogGH z5JWmG85w8TVVsk+9I`ws3mB`2bJQ02oe@qS1}@5AKxjdd?3#?SYnJmVltaGc4xcaq z^-v~av|_|q*nu{~5op^K2JR4=hJ?XlalS*EmOzGanZty7M9eC0shB>u)=)c6}N5WYM@1rLJ`ao+r)A7@E+iL4OR z!aePmpd81fOvX8lD2!N?eit-%RxnszNOIT^#ZNE^AWIg3PR8CZa|Yi5+Q1@%vET`= zzsna5gMoJLf`kDD!zN?)aDuGs0};Zv6OLqg(IaOpVCk4#XZg2+4CocCWsqH`eEB7i z4F8?1F1!Khje`Q{G@%DhVMND3wiFg0WS42o*?=UEhoCy^?1vv=YQq^%MGM>o1kRi+ z#73N1;U()vu-Fnr5_LEiw;tIKL*6}2lAF$C3VhOVj%CDWEX7HOI2BkG2r>+kW@!Q^ z0I(MhIoN<>KL4qx0?Sx_mBl%inUe2}y|aV2Fu9Rv&jky3b`!INPmz5OSdt*w;9wkc zvEtH_j|qw`6_Y(5fD|EJQLJ*1P~1j*b>E>;%zK7)!1{)KPa(&r+boK65Q;Ih;=iWs z8pS?fqcM&~$@{?{UHryu9TO%Av4nrdeC)kX$H#9$+~Ne79FKwQ1bG2_jFYdDNSrML zQlOOrd z*NyU+r6GPBMghL01uPeqK$G|nG0@&}+Tp5Tb9Q0iE?AIB1Z?x6(+qS|z?YDvNU^t_ z`K`M5{AM;$W0R9YdYEU`$to*^n@iaJ;_uB7IKM76@H51OIQJDt|L&^VBm#(2%r}nSVbj2 zM~8=pV8Eayk>Gy$1Uo-K!9LGMmHztA5~QXal0$rwisdFE=N+(&WfAPYw3u#c{F< zMO{q(^E};GLnU;W6S=zLXqIJ}VdRFE?`VRt2OpOh449Wh$y0!TK!QP^oR2 zx@!hb@TN}e&*`@mEu=FzEsJ=Z`GV*Y5{txxvW-X96CAy)3`% zS6?Xl{!QQLDgA_9(gchkvEj=+;nx48hwyt3IKjWT`a&Q#>T}x>{j+i!fSLg#0-1J;y}+%LakWbt(`-ChZY}UJ zZZQ^kn})Orfw4fp3xjIu{K^r2d2x2N&$lx-=k~cZWT`Q`)l#;R`As(4iur-nS+C8>*5(N=+~1^9yzF- zEW+hhyR{cVj0RC5*SFK_JLS>{hUd#&&p-eCKR@P=gjahznM74r#hf1BqOD&Xck~Dw6-Pw}m22sW;#<^W zdZ;;?PeWY|e2Zxfqw*lnKxkMbD6*_e2d?dw64fBWNX2y4Nmeav8OJftj+!zu_ZEKnf`(C1j(uQAF8%4#@*PE^Y>EJe`_MYB1a zp@uoB0Uqj*1_2KtO&H1@@F^2e3a7jD9^URf%<=$^DbNNO3~oO}&RD@{9t36ntb!{Y#-m`>@qIK`OTBDQ{4hTcae|t zJ6%AsNCWt_J|Jv@0+ajv8SitwYZ$KHbq&p0ln;)gBU6OlTByAVR8LC9GnNVsq{48z zuA&=K`#)pt-{9l??yeB4q=t%J8!I~j$cih-Gl&Tb;m-p2%iI}_2>$bP#M~Muncm=AF7R8pzpn~luj2y3 zX^7v>vT`iM;l|J7b{+RZ0^3W2V=5>q^;|JL)Alswzu!~}s^Qjc!qK@7;~x9TZ(qg6 z6bBQm>J)c4*FL~@+wN+X@?Ui&cF*060+A(r=3F}zBR&E@I$+cjjwsesjbe$8Rutk60PZs!y&3I)6BV;>7F}l!xxD= z{rzFM)7#=k_pxI08Eo${E!d^8pmt+++G^Gt4j@e5MRUG!-MKBg^!OP~?f1&(&bKhX z&u1SdWBi-G|8Lr_@P~azLFx(EC5!?eLI3>FM=d>Zzt>@DXgwyF4;I#T>ddP4I0-yX zdhF4w)2uD>w^qz5%xEqSAuBRmO^XcEv>eq^sS|{eK|@V*-N*_xyVECik8_I=>?-Qnof6le2E;5JB? zwBo>8986LYUX&zMAxjW|wc?eQsyVu9+6qtizCB*qlG@&RaKLhYki|sy<{0;gmCO&( zNL_FCem8gfLdWwt>T{iM!PBQL$YGkLr*H{9;|{weTXEwVIh#NII!!Me*c?+6piljx zzgF^$)NjwmPz^CQfrctjB*}tls@v3>OJOG-)vdV6t#a|EE@fan8Ca5Ua;txLE4zIc zFn_Cky29zyn8Yba%2L?emYuro_oxNG?ky>#q6P0$cid32go`ZU zB1^c)5-zfYi!9;tEK9h^6aS7U{y{dx9qN%st$EW7(eFy?%-R9>Wx;XQ0?<2z%^w#7 z85c*F$kG5ther#taFU>+I(SJ7!Puv8Npnr#vF*qWG{taT&2)X=v4Xj%xV%?NZ}Q>J!YOu*cTb@M24K3j?El}U zA8!mi>)1x$P#sk_w}Yx!29-eErgN_7p6+^v(=$wKB~}9Oe&{gjrCHbuX(^W!)`4?`0IHQGcch;GecJ~l*n?F(6LknezRRh z)n-MbO~Z1cNDo5GfOEz$R7JC0M^s4KENVwe z+)m>)-m?_$$;mxDE^%BeEa<2rD)Da!YQ+7cv%@E3Y~7k$40aZ*u!_@krdiieXu79gvQ#Yo_CL>L8uxk`5MT z^eYvKFTikHu@&7>6<DNt=%|c0}`h7|xeF@anTo@$vK7v3L9n{Qcm4KJwtEiyOFG-@w~(Lcb#RwqAl-?m!#90(G{L zy?dD@uQzwM#RqgJoR{cBH9b}HG`;6;3+DA2_ttL}YKwwte#tNbK(At&Vh|r%q<>Jd z_uJ|I(OZn;UdIVs&te1v#E9h>(yv)g*d$vcj0=QH3CIQ|0WAe?Do(M40S5hth`8Xe zsE~uG2y71@o_#9>isXQoI3Rxc#~(Bg$=)xpdolz(vFt4w(1N2oQs5vHSg&A!3+J$w z;9q6TyF@4lQYV;#i4a-9K&lu__=ZGfT0JTdUezt|luubQ#m|fog;I6PFFBK?!&$>B zY!2LZ!W%;2t`lMme46*nI}l&0BZ37*iXsbQtf(lcJpp#%A}$#p1;0J!&Lo6x&5;9f zWpoVvLI*q?gM1;snbS#5)?CpRJA>_`bm4z0VWaquECV&r!!SIE42fW!0}djhVW-(y zmq+Ii&N4rRYsMiUuxCaEOr%GC;f#eQXXoA1gEL@$l2kIn+X~bNE)c6TT4&GD6r1Z%sf4adGCbA59bZ8-@ri;IuA&|H{6WX*sMdAEaW8EWN4l zYr`_l_-CjAc;Om>G8)1~lZa9t6|HZRiy6ts@gE#RhTDFC`v@d)&+V(KSeZZq)msj4 zvH;j78rrjlVz&v^fz<%Yq!dKD&MMv^k`E@dFhkyZ5f=!03i(-pk_OTc7ehhjxTpkj zvY)~jR>6e{)O#@Qk&`kfG(^`s2TNw8IyMm1m@q{7fNF_T&?X$QR#3!qp@0WT$w72} zPGiJxB2zjffvzi@SOE=L$^~i6QB4y8;53{HwV#0i_&1fv@^9i~tHpvk07Y0|G>ndl zy4MC;Sy5L)Qw)qtXp?MDc{r_kI8CP1;zEI^OaM72Jyzv5g!iRcZlnHlt@H-(WHg-- zJo^x*81&{J;RNnof`es65J#6KoKQI0v;&D0dH_;r!*ROKePSDrg)w?XyL{R8Zj(5p zS47TT#2TGe_KYx!;CvOoVD1M3^ufp(+u5N4#wBmME=UmmXs+p;87_`F5oS%jxFk^b~3pN-v8Zgr}9N+3XOt-s+qlI0^ z3GHrZxRI?Ix}ipjgtWYZEX4_g|sSXVd)qfbI zMWP&D5z4_;EK6~lrAQ>op;m9J2(nRG^nz^Ik5kMj>toVq9`>N^|1E;i&yJ~Tx~kcl zW&8ybYg3jVz5Chuf4_dnF2Dc%j(CY05<&*kUqHAOy7N*W{w7`-d#c%8&wh~+^_Ps7 zkZzcbyJ0H3ti>#oQTE-Bvd>>y?-yk^3{TT~rYXbhGR!W+?CXZv|9`0|sxD*gGS)6* z?K0N>Y)+_$#@c15{iTH3@8unR^pJ!M#6Q{%PzK^aFUG;}qSoE*-Ws&$sRR_H-3eST9EqE_{heto8Ia?BWH!wCMifMmqFEUuJZ~H zP|PG`iWy+yMviKaGu*=MqyCz`n1uo>fh%5*EC$x6%W$oQ#+dgm^M< zhWwM%T1u@iCm%k4Jp5VJemeT}=`;NB)2H+Ov-8va_xLk>cKq{{jz66roqss|xh=C# zdU+n=9Ch}cLo0T|; zhLmW?XO4zc71hwJj;tb7hmCQYX@l=d;wDH9TPD@-GdI#@d8cji^$Edcg2l3m(}p;? zlPJ&ioXV2rD_JhzmGnYDhb=y!`&4mKJ;U@Ar>Co~tl}iAI7zsigv&{|oP^8C0!}aP zaGFd>zAQ;X`!*k_>)a=}(F6j@``s4rcd|y4tkJZakfSF;qwNsf=%to#;u)zzNfbO5 zbFzQb*Ln(8UAhQ^{xXP*qINaznoumy)O)7pw$^l-Vq7qwg#B|!OWdO~$4m>VMdh`k zG!MErjkQZURU`DPzP-?hY-UlTszsrxhPw@1xfS#P=Q!AA6F|Su zn5k2$t6`u8zHjPIpeljpTV_N}M{9cg-kt3KN4N5g74z7KM&wWTL$nMuU)QZDRG4qL zOjE-!a3U}nf-PVlc1hJp+(Y9~ z_Wgom5r{-Ty>#@Ge21NcKuHMH%YZ<=l4I)8;7``Z@lV`RDhAbsMKb)3cF8fKA{aRZ zDnwZlXZ)$el(7}Mm^fO892EsoJLGgk)UHML z@XGX7dLg#vh1eu6@0E~JV)7&=@8x3hq#k&%9vBUSg@vD%{ztfR9pJqbcH_~zP5SRo zKOQ)?WtnQ*yJ#c_@58}(5${m3aI>t_O~ue$&3)V=Zi>c}Zw*hkJXPr_rn$0+8-i)U z*dMYm5c1ng;{m=IRUlp~+-qYwhrS>rTbZxSV6h7#$1pcs*5`Zm=?>8;rgZ>WzsSSw2F#l<9A6a}syGS^iiO>s>pP~AvT zEnBf2O;=4l5+QjBYkd^}sY$F%9t$s=(T4%I_$8xXI-`$x1V-}uHSqesGs);z!RU7a zW>82@zjRKYR{l3u#P{L01h2m)@VcehaEI&2SI$q6?%RFo`~>N~_1DhNkFpEzotNnO zDsvpG4UX7o7}O)lMZe}_t%2p`Vg2C4T6)W$KL?-2TQ0LyWR{A|QhB;8l?}!-Ix!{xFGSWdHS&(lJMGUjW#VH>)lnu_L{hGqD^;y6@sWwy^2vVEk#M&|SE4k=|m z&zZ7fx4m&lSy66;IJxtBOO1BQP6#tjv39(!VaST6uNtn$N3ncqJj%Gfhvk*R3U9 z>%Hz=F8Yj_uHpDr*I~NdH5@JMI!r62X zg*u^2O-rR#U>K1RvPQ(B^=0Zn<#Z@9VvMM~z zeKRj4^y@W2zZ64P&H1{euM+z8D9A4f{Mvco*9LTVXR%aj{j6na|LzB34Ggv!Ov-Dw z)SmM7-q4+w`tUdL%GguQ?s_)S0*XxX@a2JrUOP`0JWi^!7DDLZI(rwc8qW#-gRj=M zd4*XGZkd0b9NR14KT@`97@w9acoZYV$8c3gwRN>W_0C<;G`yI2=Pt77qVmpN2=Cm* z%sY3%y>k~+@7x7?=Pst+Ir!-Uy>l1BJJ%b9(Hbx!!DtP*Ye`KHz6OPm&9<%%5QpQ zi4H)RX+NYnTY|!Yj!n$v(-qG&)3nk1}Wx2Pmi>jfdTKA#F&gKmJTin$a?WSZMxD_ZQYVh z5|AYUS38s=@DhZ}~v@=D5spiMuIWX0&p{&k*LO2pH^&q?y{L>@HA3l=3k1Rx4e=EHr z5+t=PkkqI1{j>Aa{r3_u^=Mum37C?AsV4_aiRWNkKMNUI2PO6RuES34WirptP2U3S z+Evif3i6^;sgl*kv}&I(*9yGBN8aZG_ZsKMW!BjW2LTu$RKhWq*mgc>fE_jy^97}ATfMGV;7vz>%~37C^O z;#Oe2R-?RrJ^8e-_+Mj|gxpQX4fB9;!{nR=1MzBM(qz(8%7LPjVFgLUISv?2#sgA_ zY2WLGsZuDOzfc&2UUFx%FrT?uje!%cDWr6~cIg16u_z8w8eXR~=pYU9*pQ+i6oaD5 zlR^qaO*eoo5h8Lq8)1S4MZ`f>AX?^{tetD3nC5HBH8CC2aBWp}RHw66uE{iOgw%6N zHlAuC5=Tfar(~m8)*K~12Rsg*(0Q$Te9htlUfD|nWmR)IOT*Vk&sz( z$qTL0yZP6%Cvm{iLOALdV4>VW8(%Sjf-3kcIjb)eegCF!^py6Tku2tISGc$5fhIpH zcU%>c5nlc>jqg*-z3SKV744#3%H6)0ezT&w&9Z` z;U1(jrC4r6Z%V=3kh&a#U>4HIl#22x`&&>vJ-gft8`{;`J9o?4C&0+ z3Cc=mhID4^G;yVk{s=l#iluaB?2uqeXNFXiPcM(p+?lbyf8xG5CmrdaxSNAw=3{E9 z`lZ-iO+Qx*Pc{YnP!(%t^x{zUYDg{7r8~#FY86Ce S(Trx%jQ<63B6;BU&H(_01)C@U literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/profiles/mock_orchestrator_publication.xml b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/profiles/mock_orchestrator_publication.xml new file mode 100644 index 000000000..b47d99b92 --- /dev/null +++ b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/profiles/mock_orchestrator_publication.xml @@ -0,0 +1,24 @@ + +
+ + + + + +
+ + + + + + + + + + + + + + SECURITY_PARAMETERS + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/resources/log4j.properties b/dhp-workflows/dhp-dedup-openaire/src/test/resources/log4j.properties index ce37270c6..d3e717dfa 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/resources/log4j.properties +++ b/dhp-workflows/dhp-dedup-openaire/src/test/resources/log4j.properties @@ -19,7 +19,29 @@ log4j.logger.org.eclipse.jetty=WARN log4j.logger.org.apache.hadoop.mapreduce.lib.output.FileOutputCommitterFactory=WARN log4j.logger.org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter=WARN -log4j.logger.org.apache.parquet.hadoop.ParquetOutputFormat=WARN -log4j.logger.org.apache.parquet.hadoop.InternalParquetRecordWriter=WARN +#log4j.logger.org.apache.parquet.hadoop.ParquetOutputFormat=WARN +#log4j.logger.org.apache.parquet.hadoop.InternalParquetRecordWriter=WARN log4j.logger.org.apache.hadoop.io.compress.CodecPool=WARN -log4j.logger.org.apache.parquet.hadoop.codec.CodecConfig=WARN \ No newline at end of file +#log4j.logger.org.apache.hadoop.io.compress=WARN +#log4j.logger.org.apache.parquet.hadoop.codec.CodecConfig=WARN +log4j.logger.parquet.hadoop.ColumnChunkPageWriteStore=ERROR +log4j.logger.com.jayway.jsonpath.internal.path.CompiledPath=WARN +log4j.logger.org.apache.parquet.hadoop.ParquetRecordReader=ERROR +log4j.logger.parquet.hadoop=WARN +log4j.logger.org.eclipse.jetty.server.handler.ContextHandlerCollection=WARN +log4j.logger.org.spark_project.jetty.util.component.ContainerLifeCycle=WARN +log4j.logger.org.apache.hadoop.mapred.FileInputFormat=WARN +log4j.logger.org.spark_project.jetty.servlet.ServletHandler=WARN +log4j.logger.org.apache.commons.beanutils.converters.BooleanConverter=WARN +log4j.logger.org.apache.commons.beanutils.converters.StringConverter=WARN +log4j.logger.org.apache.commons.beanutils.converters.LongConverter=WARN +log4j.logger.org.apache.commons.beanutils.converters.ArrayConverter=WARN +log4j.logger.org.apache.commons.beanutils.converters.FloatConverter=WARN +log4j.logger.org.apache.commons.beanutils.converters.IntegerConverter=WARN +log4j.logger.org.apache.commons.beanutils.converters.DoubleConverter=WARN +log4j.logger.org.apache.commons.beanutils.converters.CharacterConverter=WARN +log4j.logger.org.apache.commons.beanutils.converters.ByteConverter=WARN +log4j.logger.org.apache.commons.beanutils.converters.BigIntegerConverter=WARN +log4j.logger.org.apache.commons.beanutils.converters.BigDecimalConverter=WARN +log4j.logger.org.apache.commons.beanutils.converters.ShortConverter=WARN +log4j.logger.org.apache.commons.beanutils.BeanUtils=WARN From 3dbc637d3e8e8a5a8c775821998b1991e10690bc Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 17 Nov 2022 09:55:41 +0100 Subject: [PATCH 22/55] code formatting --- .../oa/dedup/SparkDedupPublicationTest.java | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupPublicationTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupPublicationTest.java index c657d1865..773de65fa 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupPublicationTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupPublicationTest.java @@ -15,7 +15,6 @@ import java.nio.file.Paths; import java.util.*; import java.util.stream.Collectors; -import com.fasterxml.jackson.core.JsonProcessingException; import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; @@ -32,6 +31,7 @@ import org.mockito.Mock; import org.mockito.Mockito; import org.mockito.junit.jupiter.MockitoExtension; +import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.Sets; @@ -299,17 +299,18 @@ public class SparkDedupPublicationTest implements Serializable { assertEquals("Article", instance_cr.get().getInstancetype().getClassname()); } - private void verifyRoot_case_2(Dataset roots, Dataset pubs) throws JsonProcessingException { + private void verifyRoot_case_2(Dataset roots, Dataset pubs) + throws JsonProcessingException { Publication root = roots .filter("id = '50|doi_dedup___::18aff3b55fb6876466a5d4bd82434885'") .first(); assertNotNull(root); Publication crossref_duplicate = pubs - .filter("id = '50|doi_________::18aff3b55fb6876466a5d4bd82434885'") - .first(); + .filter("id = '50|doi_________::18aff3b55fb6876466a5d4bd82434885'") + .first(); - //System.err.println(new ObjectMapper().writeValueAsString(root)); + // System.err.println(new ObjectMapper().writeValueAsString(root)); assertEquals(crossref_duplicate.getJournal().getName(), root.getJournal().getName()); assertEquals(crossref_duplicate.getJournal().getIssnOnline(), root.getJournal().getIssnOnline()); @@ -325,10 +326,10 @@ public class SparkDedupPublicationTest implements Serializable { .collect(Collectors.toCollection(HashSet::new)); Set root_cf = root - .getCollectedfrom() - .stream() - .map(KeyValue::getValue) - .collect(Collectors.toCollection(HashSet::new)); + .getCollectedfrom() + .stream() + .map(KeyValue::getValue) + .collect(Collectors.toCollection(HashSet::new)); assertTrue(Sets.difference(root_cf, dups_cf).isEmpty()); } From 0aa725083f66875b4eef7ad6513b24bf557eb5ec Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 17 Nov 2022 16:13:43 +0100 Subject: [PATCH 23/55] extended dedup testing --- .../dhp/oa/dedup/DedupRecordFactory.java | 2 +- ...st.java => SparkPublicationRootsTest.java} | 283 +++++++++--------- .../oa/dedup/SparkPublicationRootsTest2.java | 251 ++++++++++++++++ .../entities2/publication/publication.gz | Bin 9056 -> 0 bytes .../alterations/publication/publication_1.gz | Bin 0 -> 1488 bytes .../entities/publication/publication_0.gz | Bin 0 -> 10874 bytes 6 files changed, 400 insertions(+), 136 deletions(-) rename dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/{SparkDedupPublicationTest.java => SparkPublicationRootsTest.java} (57%) create mode 100644 dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkPublicationRootsTest2.java delete mode 100644 dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/entities2/publication/publication.gz create mode 100644 dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/root/alterations/publication/publication_1.gz create mode 100644 dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/root/entities/publication/publication_0.gz diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java index f9fc8a21a..82bf87cca 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java @@ -112,7 +112,7 @@ public class DedupRecordFactory { // set authors and date if (ModelSupport.isSubClass(entity, Result.class)) { - ((Result) entity).setDateofacceptance(DatePicker.pick(dates)); + // ((Result) entity).setDateofacceptance(DatePicker.pick(dates)); ((Result) entity).setAuthor(AuthorMerger.merge(authors)); } diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupPublicationTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkPublicationRootsTest.java similarity index 57% rename from dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupPublicationTest.java rename to dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkPublicationRootsTest.java index 773de65fa..3cff836eb 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupPublicationTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkPublicationRootsTest.java @@ -11,16 +11,17 @@ import java.io.File; import java.io.IOException; import java.io.Serializable; import java.net.URISyntaxException; +import java.nio.file.Path; import java.nio.file.Paths; import java.util.*; import java.util.stream.Collectors; +import org.apache.commons.cli.ParseException; import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.FilterFunction; -import org.apache.spark.api.java.function.FlatMapFunction; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; @@ -31,7 +32,6 @@ import org.mockito.Mock; import org.mockito.Mockito; import org.mockito.junit.jupiter.MockitoExtension; -import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.Sets; @@ -44,48 +44,52 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @ExtendWith(MockitoExtension.class) @TestMethodOrder(MethodOrderer.OrderAnnotation.class) -public class SparkDedupPublicationTest implements Serializable { +public class SparkPublicationRootsTest implements Serializable { @Mock(serializable = true) ISLookUpService isLookUpService; private static SparkSession spark; - private static JavaSparkContext jsc; + private static String workingPath; - private static String testGraphBasePath; - private static String testOutputBasePath; - private static String testDedupGraphBasePath; + private static String graphInputPath; + private static String graphOutputPath; private static final String testActionSetId = "test-orchestrator"; + private static Path testBaseTmpPath; + + private static final ObjectMapper MAPPER = new ObjectMapper() + .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); + @BeforeAll - public static void cleanUp() throws IOException, URISyntaxException { + public static void init() throws IOException, URISyntaxException { - testGraphBasePath = Paths - .get(SparkDedupPublicationTest.class.getResource("/eu/dnetlib/dhp/dedup/entities2").toURI()) - .toFile() - .getAbsolutePath(); - testOutputBasePath = createTempDirectory(SparkDedupPublicationTest.class.getSimpleName() + "-") - .toAbsolutePath() - .toString(); + testBaseTmpPath = createTempDirectory(SparkPublicationRootsTest.class.getSimpleName() + "-"); - testDedupGraphBasePath = createTempDirectory(SparkDedupPublicationTest.class.getSimpleName() + "-") - .toAbsolutePath() - .toString(); + final File entitiesSources = Paths + .get(SparkPublicationRootsTest.class.getResource("/eu/dnetlib/dhp/dedup/root").toURI()) + .toFile(); - FileUtils.deleteDirectory(new File(testOutputBasePath)); - FileUtils.deleteDirectory(new File(testDedupGraphBasePath)); + FileUtils + .copyDirectory( + entitiesSources, + testBaseTmpPath.resolve("input").toFile()); + + workingPath = testBaseTmpPath.resolve("workingPath").toString(); + graphInputPath = testBaseTmpPath.resolve("input").resolve("entities").toString(); + graphOutputPath = testBaseTmpPath.resolve("output").toString(); + + FileUtils.deleteDirectory(new File(workingPath)); + FileUtils.deleteDirectory(new File(graphOutputPath)); final SparkConf conf = new SparkConf(); conf.set("spark.sql.shuffle.partitions", "10"); spark = SparkSession .builder() - .appName(SparkDedupPublicationTest.class.getSimpleName()) + .appName(SparkPublicationRootsTest.class.getSimpleName()) .master("local[*]") .config(conf) .getOrCreate(); - - jsc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - } @BeforeEach @@ -100,55 +104,51 @@ public class SparkDedupPublicationTest implements Serializable { .thenReturn(classPathResourceAsString("/eu/dnetlib/dhp/dedup/conf/pub.curr.conf.json")); } + @AfterAll + public static void tearDown() throws IOException { + FileUtils.deleteDirectory(testBaseTmpPath.toFile()); + spark.close(); + } + @Test @Order(1) void createSimRelsTest() throws Exception { - - ArgumentApplicationParser parser = new ArgumentApplicationParser( - classPathResourceAsString("/eu/dnetlib/dhp/oa/dedup/createSimRels_parameters.json")); - - parser - .parseArgument( - new String[] { - "--graphBasePath", testGraphBasePath, - "--actionSetId", testActionSetId, - "--isLookUpUrl", "lookupurl", - "--workingPath", testOutputBasePath, - "--numPartitions", "5" - }); - - new SparkCreateSimRels(parser, spark).run(isLookUpService); + new SparkCreateSimRels(args( + "/eu/dnetlib/dhp/oa/dedup/createSimRels_parameters.json", + new String[] { + "--graphBasePath", graphInputPath, + "--actionSetId", testActionSetId, + "--isLookUpUrl", "lookupurl", + "--workingPath", workingPath, + "--numPartitions", "5" + }), spark) + .run(isLookUpService); long pubs_simrel = spark .read() - .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "publication")) + .load(DedupUtility.createSimRelPath(workingPath, testActionSetId, "publication")) .count(); - assertEquals(62, pubs_simrel); + assertEquals(74, pubs_simrel); } @Test @Order(2) void cutMergeRelsTest() throws Exception { - - ArgumentApplicationParser parser = new ArgumentApplicationParser( - classPathResourceAsString("/eu/dnetlib/dhp/oa/dedup/createCC_parameters.json")); - - parser - .parseArgument( - new String[] { - "--graphBasePath", testGraphBasePath, - "--actionSetId", testActionSetId, - "--isLookUpUrl", "lookupurl", - "--workingPath", testOutputBasePath, - "--cutConnectedComponent", "3" - }); - - new SparkCreateMergeRels(parser, spark).run(isLookUpService); + new SparkCreateMergeRels(args( + "/eu/dnetlib/dhp/oa/dedup/createCC_parameters.json", + new String[] { + "--graphBasePath", graphInputPath, + "--actionSetId", testActionSetId, + "--isLookUpUrl", "lookupurl", + "--workingPath", workingPath, + "--cutConnectedComponent", "3" + }), spark) + .run(isLookUpService); long pubs_mergerel = spark .read() - .load(testOutputBasePath + "/" + testActionSetId + "/publication_mergerel") + .load(workingPath + "/" + testActionSetId + "/publication_mergerel") .as(Encoders.bean(Relation.class)) .filter((FilterFunction) r -> r.getRelClass().equalsIgnoreCase("merges")) .groupBy("source") @@ -159,49 +159,44 @@ public class SparkDedupPublicationTest implements Serializable { assertEquals(0, pubs_mergerel); - FileUtils.deleteDirectory(new File(testOutputBasePath + "/" + testActionSetId + "/publication_mergerel")); + FileUtils.deleteDirectory(new File(workingPath + "/" + testActionSetId + "/publication_mergerel")); } @Test @Order(3) void createMergeRelsTest() throws Exception { + new SparkCreateMergeRels(args( + "/eu/dnetlib/dhp/oa/dedup/createCC_parameters.json", + new String[] { + "--graphBasePath", graphInputPath, + "--actionSetId", testActionSetId, + "--isLookUpUrl", "lookupurl", + "--workingPath", workingPath + }), spark) + .run(isLookUpService); - ArgumentApplicationParser parser = new ArgumentApplicationParser( - classPathResourceAsString("/eu/dnetlib/dhp/oa/dedup/createCC_parameters.json")); - - parser - .parseArgument( - new String[] { - "--graphBasePath", testGraphBasePath, - "--actionSetId", testActionSetId, - "--isLookUpUrl", "lookupurl", - "--workingPath", testOutputBasePath - }); - - new SparkCreateMergeRels(parser, spark).run(isLookUpService); - - final Dataset pubs = spark + final Dataset merges = spark .read() - .load(testOutputBasePath + "/" + testActionSetId + "/publication_mergerel") + .load(workingPath + "/" + testActionSetId + "/publication_mergerel") .as(Encoders.bean(Relation.class)); - final List merges = pubs + final List mergeList = merges .filter("source == '50|doi_dedup___::d5021b53204e4fdeab6ff5d5bc468032'") .collectAsList(); - assertEquals(3, merges.size()); + assertEquals(3, mergeList.size()); Set dups = Sets .newHashSet( "50|doi_________::3b1d0d8e8f930826665df9d6b82fbb73", "50|doi_________::d5021b53204e4fdeab6ff5d5bc468032", "50|arXiv_______::c93aeb433eb90ed7a86e29be00791b7c"); - merges.forEach(r -> { + mergeList.forEach(r -> { assertEquals(ModelConstants.RESULT_RESULT, r.getRelType()); assertEquals(ModelConstants.DEDUP, r.getSubRelType()); assertEquals(ModelConstants.MERGES, r.getRelClass()); assertTrue(dups.contains(r.getTarget())); }); - final List mergedIn = pubs + final List mergedIn = merges .filter("target == '50|doi_dedup___::d5021b53204e4fdeab6ff5d5bc468032'") .collectAsList(); assertEquals(3, mergedIn.size()); @@ -212,47 +207,37 @@ public class SparkDedupPublicationTest implements Serializable { assertTrue(dups.contains(r.getSource())); }); - assertEquals(24, pubs.count()); + assertEquals(32, merges.count()); } @Test @Order(4) void createDedupRecordTest() throws Exception { - - ArgumentApplicationParser parser = new ArgumentApplicationParser( - classPathResourceAsString("/eu/dnetlib/dhp/oa/dedup/createDedupRecord_parameters.json")); - parser - .parseArgument( - new String[] { - "--graphBasePath", testGraphBasePath, - "--actionSetId", testActionSetId, - "--isLookUpUrl", "lookupurl", - "--workingPath", testOutputBasePath - }); - - new SparkCreateDedupRecord(parser, spark).run(isLookUpService); - - final ObjectMapper mapper = new ObjectMapper() - .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); + new SparkCreateDedupRecord(args( + "/eu/dnetlib/dhp/oa/dedup/createDedupRecord_parameters.json", + new String[] { + "--graphBasePath", graphInputPath, + "--actionSetId", testActionSetId, + "--isLookUpUrl", "lookupurl", + "--workingPath", workingPath + }), spark) + .run(isLookUpService); final Dataset roots = spark .read() - .textFile(testOutputBasePath + "/" + testActionSetId + "/publication_deduprecord") - .map( - (MapFunction) value -> mapper.readValue(value, Publication.class), - Encoders.bean(Publication.class)); + .textFile(workingPath + "/" + testActionSetId + "/publication_deduprecord") + .map(asEntity(Publication.class), Encoders.bean(Publication.class)); - assertEquals(2, roots.count()); + assertEquals(3, roots.count()); final Dataset pubs = spark .read() - .textFile(DedupUtility.createEntityPath(testGraphBasePath, "publication")) - .map( - (MapFunction) value -> mapper.readValue(value, Publication.class), - Encoders.bean(Publication.class)); + .textFile(DedupUtility.createEntityPath(graphInputPath, "publication")) + .map(asEntity(Publication.class), Encoders.bean(Publication.class)); verifyRoot_case_1(roots, pubs); verifyRoot_case_2(roots, pubs); + verifyRoot_case_3(roots, pubs); } private static void verifyRoot_case_1(Dataset roots, Dataset pubs) { @@ -299,8 +284,7 @@ public class SparkDedupPublicationTest implements Serializable { assertEquals("Article", instance_cr.get().getInstancetype().getClassname()); } - private void verifyRoot_case_2(Dataset roots, Dataset pubs) - throws JsonProcessingException { + private void verifyRoot_case_2(Dataset roots, Dataset pubs) { Publication root = roots .filter("id = '50|doi_dedup___::18aff3b55fb6876466a5d4bd82434885'") .first(); @@ -334,57 +318,86 @@ public class SparkDedupPublicationTest implements Serializable { assertTrue(Sets.difference(root_cf, dups_cf).isEmpty()); } + private void verifyRoot_case_3(Dataset roots, Dataset pubs) { + Publication root = roots + .filter("id = '50|dedup_wf_001::31ca734cc22181b704c4aa8fd050062a'") + .first(); + assertNotNull(root); + + Publication pivot_duplicate = pubs + .filter("id = '50|od_______166::31ca734cc22181b704c4aa8fd050062a'") + .first(); + + assertEquals(pivot_duplicate.getPublisher().getValue(), root.getPublisher().getValue()); + + Set dups_cf = pubs + .collectAsList() + .stream() + .flatMap(p -> p.getCollectedfrom().stream()) + .map(KeyValue::getValue) + .collect(Collectors.toCollection(HashSet::new)); + + Set root_cf = root + .getCollectedfrom() + .stream() + .map(KeyValue::getValue) + .collect(Collectors.toCollection(HashSet::new)); + + assertTrue(Sets.difference(root_cf, dups_cf).isEmpty()); + } + @Test @Order(6) void updateEntityTest() throws Exception { + new SparkUpdateEntity(args( + "/eu/dnetlib/dhp/oa/dedup/updateEntity_parameters.json", + new String[] { + "--graphBasePath", graphInputPath, + "--workingPath", workingPath, + "--dedupGraphPath", graphOutputPath + }), spark) + .run(isLookUpService); - ArgumentApplicationParser parser = new ArgumentApplicationParser( - classPathResourceAsString("/eu/dnetlib/dhp/oa/dedup/updateEntity_parameters.json")); - parser - .parseArgument( - new String[] { - "-i", testGraphBasePath, "-w", testOutputBasePath, "-o", testDedupGraphBasePath - }); - - new SparkUpdateEntity(parser, spark).run(isLookUpService); - - long publications = jsc.textFile(testDedupGraphBasePath + "/publication").count(); + long publications = spark.read().textFile(graphOutputPath + "/publication").count(); long mergedPubs = spark .read() - .load(testOutputBasePath + "/" + testActionSetId + "/publication_mergerel") + .load(workingPath + "/" + testActionSetId + "/publication_mergerel") .as(Encoders.bean(Relation.class)) .where("relClass=='merges'") - .javaRDD() - .map(Relation::getTarget) + .map((MapFunction) Relation::getTarget, Encoders.STRING()) .distinct() .count(); - assertEquals(14, publications); + assertEquals(19, publications); // 16 originals + 3 roots - long deletedPubs = jsc - .textFile(testDedupGraphBasePath + "/publication") - .filter(this::isDeletedByInference) + long deletedPubs = spark + .read() + .textFile(graphOutputPath + "/publication") + .map(asEntity(Publication.class), Encoders.bean(Publication.class)) + .filter("datainfo.deletedbyinference == true") + .map((MapFunction) OafEntity::getId, Encoders.STRING()) + .distinct() .count(); assertEquals(mergedPubs, deletedPubs); } - @AfterAll - public static void finalCleanUp() throws IOException { - FileUtils.deleteDirectory(new File(testOutputBasePath)); - FileUtils.deleteDirectory(new File(testDedupGraphBasePath)); - } - - public boolean isDeletedByInference(String s) { - return s.contains("\"deletedbyinference\":true"); - } - private static String classPathResourceAsString(String path) throws IOException { return IOUtils .toString( - SparkDedupPublicationTest.class + SparkPublicationRootsTest.class .getResourceAsStream(path)); } + private static MapFunction asEntity(Class clazz) { + return value -> MAPPER.readValue(value, clazz); + } + + private ArgumentApplicationParser args(String paramSpecs, String[] args) throws IOException, ParseException { + ArgumentApplicationParser parser = new ArgumentApplicationParser(classPathResourceAsString(paramSpecs)); + parser.parseArgument(args); + return parser; + } + } diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkPublicationRootsTest2.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkPublicationRootsTest2.java new file mode 100644 index 000000000..9afe1e34b --- /dev/null +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkPublicationRootsTest2.java @@ -0,0 +1,251 @@ + +package eu.dnetlib.dhp.oa.dedup; + +import static java.nio.file.Files.createTempDirectory; + +import static org.apache.spark.sql.functions.count; +import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.Mockito.lenient; + +import java.io.File; +import java.io.IOException; +import java.io.Serializable; +import java.net.URISyntaxException; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.HashSet; +import java.util.List; +import java.util.Optional; +import java.util.Set; +import java.util.stream.Collectors; + +import org.apache.commons.cli.ParseException; +import org.apache.commons.io.FileUtils; +import org.apache.commons.io.IOUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.FilterFunction; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SparkSession; +import org.junit.jupiter.api.*; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; +import org.mockito.Mockito; +import org.mockito.junit.jupiter.MockitoExtension; + +import com.fasterxml.jackson.databind.DeserializationFeature; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.collect.Sets; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; + +@ExtendWith(MockitoExtension.class) +@TestMethodOrder(MethodOrderer.OrderAnnotation.class) +public class SparkPublicationRootsTest2 implements Serializable { + + @Mock(serializable = true) + ISLookUpService isLookUpService; + private static SparkSession spark; + + private static String workingPath; + + private static String graphInputPath; + + private static String graphOutputPath; + + private static final String testActionSetId = "test-orchestrator"; + + private static Path testBaseTmpPath; + + private static final ObjectMapper MAPPER = new ObjectMapper() + .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); + + @BeforeAll + public static void init() throws IOException, URISyntaxException { + + testBaseTmpPath = createTempDirectory(SparkPublicationRootsTest2.class.getSimpleName() + "-"); + + final File entitiesSources = Paths + .get(SparkPublicationRootsTest2.class.getResource("/eu/dnetlib/dhp/dedup/root").toURI()) + .toFile(); + + FileUtils + .copyDirectory( + entitiesSources, + testBaseTmpPath.resolve("input").toFile()); + + FileUtils + .copyFileToDirectory( + Paths + .get( + SparkPublicationRootsTest2.class + .getResource( + "/eu/dnetlib/dhp/dedup/root/alterations/publication/publication_1.gz") + .toURI()) + .toFile(), + testBaseTmpPath.resolve("input").resolve("entities").resolve("publication").toFile()); + + workingPath = testBaseTmpPath.resolve("workingPath").toString(); + graphInputPath = testBaseTmpPath.resolve("input").resolve("entities").toString(); + graphOutputPath = testBaseTmpPath.resolve("output").toString(); + + final SparkConf conf = new SparkConf(); + conf.set("spark.sql.shuffle.partitions", "10"); + spark = SparkSession + .builder() + .appName(SparkPublicationRootsTest2.class.getSimpleName()) + .master("local[*]") + .config(conf) + .getOrCreate(); + } + + @BeforeEach + public void setUp() throws IOException, ISLookUpException { + + lenient() + .when(isLookUpService.getResourceProfileByQuery(Mockito.contains(testActionSetId))) + .thenReturn(classPathResourceAsString("/eu/dnetlib/dhp/dedup/profiles/mock_orchestrator_publication.xml")); + + lenient() + .when(isLookUpService.getResourceProfileByQuery(Mockito.contains("publication"))) + .thenReturn(classPathResourceAsString("/eu/dnetlib/dhp/dedup/conf/pub.curr.conf.json")); + } + + @AfterAll + public static void tearDown() throws IOException { + FileUtils.deleteDirectory(testBaseTmpPath.toFile()); + } + + @Test + @Order(7) + void dedupAlteredDatasetTest() throws Exception { + + new SparkCreateSimRels(args( + "/eu/dnetlib/dhp/oa/dedup/createSimRels_parameters.json", + new String[] { + "--graphBasePath", graphInputPath, + "--actionSetId", testActionSetId, + "--isLookUpUrl", "lookupurl", + "--workingPath", workingPath, + "--numPartitions", "5" + }), spark) + .run(isLookUpService); + + new SparkCreateMergeRels(args( + "/eu/dnetlib/dhp/oa/dedup/createCC_parameters.json", + new String[] { + "--graphBasePath", graphInputPath, + "--actionSetId", testActionSetId, + "--isLookUpUrl", "lookupurl", + "--workingPath", workingPath + }), spark) + .run(isLookUpService); + + final Dataset merges = spark + .read() + .load(workingPath + "/" + testActionSetId + "/publication_mergerel") + .as(Encoders.bean(Relation.class)); + + assertEquals( + 3, merges + .filter("relclass == 'isMergedIn'") + .map((MapFunction) Relation::getTarget, Encoders.STRING()) + .distinct() + .count()); + assertEquals( + 4, merges + .filter("source == '50|doi_dedup___::b3aec7985136e36827176aaa1dd5082d'") + .count()); + + new SparkCreateDedupRecord(args( + "/eu/dnetlib/dhp/oa/dedup/createDedupRecord_parameters.json", + new String[] { + "--graphBasePath", graphInputPath, + "--actionSetId", testActionSetId, + "--isLookUpUrl", "lookupurl", + "--workingPath", workingPath + }), spark) + .run(isLookUpService); + + final Dataset roots = spark + .read() + .textFile(workingPath + "/" + testActionSetId + "/publication_deduprecord") + .map(asEntity(Publication.class), Encoders.bean(Publication.class)); + + assertEquals(3, roots.count()); + + final Dataset pubs = spark + .read() + .textFile(DedupUtility.createEntityPath(graphInputPath, "publication")) + .map(asEntity(Publication.class), Encoders.bean(Publication.class)); + + Publication root = roots + .filter("id = '50|doi_dedup___::b3aec7985136e36827176aaa1dd5082d'") + .first(); + assertNotNull(root); + + Publication crossref_duplicate = pubs + .filter("id = '50|doi_________::b3aec7985136e36827176aaa1dd5082d'") + .collectAsList() + .get(0); + + assertEquals(crossref_duplicate.getDateofacceptance().getValue(), root.getDateofacceptance().getValue()); + assertEquals(crossref_duplicate.getJournal().getName(), root.getJournal().getName()); + assertEquals(crossref_duplicate.getJournal().getIssnPrinted(), root.getJournal().getIssnPrinted()); + assertEquals(crossref_duplicate.getPublisher().getValue(), root.getPublisher().getValue()); + + Set rootPids = root + .getPid() + .stream() + .map(StructuredProperty::getValue) + .collect(Collectors.toCollection(HashSet::new)); + Set dupPids = crossref_duplicate + .getPid() + .stream() + .map(StructuredProperty::getValue) + .collect(Collectors.toCollection(HashSet::new)); + + assertFalse(Sets.intersection(rootPids, dupPids).isEmpty()); + assertTrue(rootPids.contains("10.1109/jstqe.2022.3205716")); + assertTrue(rootPids.contains("10.1109/jstqe.2023.9999999")); + + Optional instance_cr = root + .getInstance() + .stream() + .filter(i -> i.getCollectedfrom().getValue().equals("Crossref")) + .findFirst(); + assertTrue(instance_cr.isPresent()); + assertEquals("OPEN", instance_cr.get().getAccessright().getClassid()); + assertEquals("Open Access", instance_cr.get().getAccessright().getClassname()); + assertEquals(OpenAccessRoute.hybrid, instance_cr.get().getAccessright().getOpenAccessRoute()); + assertEquals( + "IEEE Journal of Selected Topics in Quantum Electronics", instance_cr.get().getHostedby().getValue()); + assertEquals("0001", instance_cr.get().getInstancetype().getClassid()); + assertEquals("Article", instance_cr.get().getInstancetype().getClassname()); + + } + + private static String classPathResourceAsString(String path) throws IOException { + return IOUtils + .toString( + SparkPublicationRootsTest2.class + .getResourceAsStream(path)); + } + + private static MapFunction asEntity(Class clazz) { + return value -> MAPPER.readValue(value, clazz); + } + + private ArgumentApplicationParser args(String paramSpecs, String[] args) throws IOException, ParseException { + ArgumentApplicationParser parser = new ArgumentApplicationParser(classPathResourceAsString(paramSpecs)); + parser.parseArgument(args); + return parser; + } + +} diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/entities2/publication/publication.gz b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/entities2/publication/publication.gz deleted file mode 100644 index 96dd218174dbb1fa6ab55da289dc116bbceb9963..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 9056 zcmV-mBcI$KiwFp`FK%N118{X>Y-wX*bZKvHE_8Tw0PTJ2a@$Cf?*H=?6^_^mIffV9 zZ@oPy)Uw*G9b3}MvU|_YnNSpff(RP~H~=A8?d^Bp%qoxsNRXl=Qlg~lu-g(*tV?EA zR#xVhi63u9rJuybfaPyI^5e}FO-cqoy`lMk#aD0s?2tDh9n+&U%J}m*y^4$2Pq0ly zlY&8mQJ!6~l%@fr!8p!R-Y`gLQN$s3RooQuaFpd^PdqQ!_zirGw@B%b5BD3*uUIi= zAwDUB0UI*$_$PxyF`#1a< zgAU*!e2?MxeV7;x0#+1xd^s4;FYoj5-;YmzKVDc;nvDs)qH#izTy4t|on8#HkS$wL zqX|wXq3LBwFWLMo$~0VHrYQ4C%ad!C zc8}S$;L7CFcQm>Q`KTj4;6MHeKjt+320yFVSro-dOe=ZePopwP7CY^bswqp;*Y*MR+Xv*`b%^L>VXQJA z(=e-#wx5o(G|t+`Gu9YSblD-3j{fA|XGK=#0UO_p76faVc-KB(kSkOf2l1F)Ogdg7 zUyJQf&NA3$aY~aTzP>lA(lZ>>RQfdk9$)n|O>ugPr5pB}fAXDM1bIA~thc7Yz5plV zfF|7odKHgv$fq)oXu!IM!+1Q7(@XM!7A!Bw-j{<8B#SMphmKM$hm-h;U31$KogGH z5JWmG85w8TVVsk+9I`ws3mB`2bJQ02oe@qS1}@5AKxjdd?3#?SYnJmVltaGc4xcaq z^-v~av|_|q*nu{~5op^K2JR4=hJ?XlalS*EmOzGanZty7M9eC0shB>u)=)c6}N5WYM@1rLJ`ao+r)A7@E+iL4OR z!aePmpd81fOvX8lD2!N?eit-%RxnszNOIT^#ZNE^AWIg3PR8CZa|Yi5+Q1@%vET`= zzsna5gMoJLf`kDD!zN?)aDuGs0};Zv6OLqg(IaOpVCk4#XZg2+4CocCWsqH`eEB7i z4F8?1F1!Khje`Q{G@%DhVMND3wiFg0WS42o*?=UEhoCy^?1vv=YQq^%MGM>o1kRi+ z#73N1;U()vu-Fnr5_LEiw;tIKL*6}2lAF$C3VhOVj%CDWEX7HOI2BkG2r>+kW@!Q^ z0I(MhIoN<>KL4qx0?Sx_mBl%inUe2}y|aV2Fu9Rv&jky3b`!INPmz5OSdt*w;9wkc zvEtH_j|qw`6_Y(5fD|EJQLJ*1P~1j*b>E>;%zK7)!1{)KPa(&r+boK65Q;Ih;=iWs z8pS?fqcM&~$@{?{UHryu9TO%Av4nrdeC)kX$H#9$+~Ne79FKwQ1bG2_jFYdDNSrML zQlOOrd z*NyU+r6GPBMghL01uPeqK$G|nG0@&}+Tp5Tb9Q0iE?AIB1Z?x6(+qS|z?YDvNU^t_ z`K`M5{AM;$W0R9YdYEU`$to*^n@iaJ;_uB7IKM76@H51OIQJDt|L&^VBm#(2%r}nSVbj2 zM~8=pV8Eayk>Gy$1Uo-K!9LGMmHztA5~QXal0$rwisdFE=N+(&WfAPYw3u#c{F< zMO{q(^E};GLnU;W6S=zLXqIJ}VdRFE?`VRt2OpOh449Wh$y0!TK!QP^oR2 zx@!hb@TN}e&*`@mEu=FzEsJ=Z`GV*Y5{txxvW-X96CAy)3`% zS6?Xl{!QQLDgA_9(gchkvEj=+;nx48hwyt3IKjWT`a&Q#>T}x>{j+i!fSLg#0-1J;y}+%LakWbt(`-ChZY}UJ zZZQ^kn})Orfw4fp3xjIu{K^r2d2x2N&$lx-=k~cZWT`Q`)l#;R`As(4iur-nS+C8>*5(N=+~1^9yzF- zEW+hhyR{cVj0RC5*SFK_JLS>{hUd#&&p-eCKR@P=gjahznM74r#hf1BqOD&Xck~Dw6-Pw}m22sW;#<^W zdZ;;?PeWY|e2Zxfqw*lnKxkMbD6*_e2d?dw64fBWNX2y4Nmeav8OJftj+!zu_ZEKnf`(C1j(uQAF8%4#@*PE^Y>EJe`_MYB1a zp@uoB0Uqj*1_2KtO&H1@@F^2e3a7jD9^URf%<=$^DbNNO3~oO}&RD@{9t36ntb!{Y#-m`>@qIK`OTBDQ{4hTcae|t zJ6%AsNCWt_J|Jv@0+ajv8SitwYZ$KHbq&p0ln;)gBU6OlTByAVR8LC9GnNVsq{48z zuA&=K`#)pt-{9l??yeB4q=t%J8!I~j$cih-Gl&Tb;m-p2%iI}_2>$bP#M~Muncm=AF7R8pzpn~luj2y3 zX^7v>vT`iM;l|J7b{+RZ0^3W2V=5>q^;|JL)Alswzu!~}s^Qjc!qK@7;~x9TZ(qg6 z6bBQm>J)c4*FL~@+wN+X@?Ui&cF*060+A(r=3F}zBR&E@I$+cjjwsesjbe$8Rutk60PZs!y&3I)6BV;>7F}l!xxD= z{rzFM)7#=k_pxI08Eo${E!d^8pmt+++G^Gt4j@e5MRUG!-MKBg^!OP~?f1&(&bKhX z&u1SdWBi-G|8Lr_@P~azLFx(EC5!?eLI3>FM=d>Zzt>@DXgwyF4;I#T>ddP4I0-yX zdhF4w)2uD>w^qz5%xEqSAuBRmO^XcEv>eq^sS|{eK|@V*-N*_xyVECik8_I=>?-Qnof6le2E;5JB? zwBo>8986LYUX&zMAxjW|wc?eQsyVu9+6qtizCB*qlG@&RaKLhYki|sy<{0;gmCO&( zNL_FCem8gfLdWwt>T{iM!PBQL$YGkLr*H{9;|{weTXEwVIh#NII!!Me*c?+6piljx zzgF^$)NjwmPz^CQfrctjB*}tls@v3>OJOG-)vdV6t#a|EE@fan8Ca5Ua;txLE4zIc zFn_Cky29zyn8Yba%2L?emYuro_oxNG?ky>#q6P0$cid32go`ZU zB1^c)5-zfYi!9;tEK9h^6aS7U{y{dx9qN%st$EW7(eFy?%-R9>Wx;XQ0?<2z%^w#7 z85c*F$kG5ther#taFU>+I(SJ7!Puv8Npnr#vF*qWG{taT&2)X=v4Xj%xV%?NZ}Q>J!YOu*cTb@M24K3j?El}U zA8!mi>)1x$P#sk_w}Yx!29-eErgN_7p6+^v(=$wKB~}9Oe&{gjrCHbuX(^W!)`4?`0IHQGcch;GecJ~l*n?F(6LknezRRh z)n-MbO~Z1cNDo5GfOEz$R7JC0M^s4KENVwe z+)m>)-m?_$$;mxDE^%BeEa<2rD)Da!YQ+7cv%@E3Y~7k$40aZ*u!_@krdiieXu79gvQ#Yo_CL>L8uxk`5MT z^eYvKFTikHu@&7>6<DNt=%|c0}`h7|xeF@anTo@$vK7v3L9n{Qcm4KJwtEiyOFG-@w~(Lcb#RwqAl-?m!#90(G{L zy?dD@uQzwM#RqgJoR{cBH9b}HG`;6;3+DA2_ttL}YKwwte#tNbK(At&Vh|r%q<>Jd z_uJ|I(OZn;UdIVs&te1v#E9h>(yv)g*d$vcj0=QH3CIQ|0WAe?Do(M40S5hth`8Xe zsE~uG2y71@o_#9>isXQoI3Rxc#~(Bg$=)xpdolz(vFt4w(1N2oQs5vHSg&A!3+J$w z;9q6TyF@4lQYV;#i4a-9K&lu__=ZGfT0JTdUezt|luubQ#m|fog;I6PFFBK?!&$>B zY!2LZ!W%;2t`lMme46*nI}l&0BZ37*iXsbQtf(lcJpp#%A}$#p1;0J!&Lo6x&5;9f zWpoVvLI*q?gM1;snbS#5)?CpRJA>_`bm4z0VWaquECV&r!!SIE42fW!0}djhVW-(y zmq+Ii&N4rRYsMiUuxCaEOr%GC;f#eQXXoA1gEL@$l2kIn+X~bNE)c6TT4&GD6r1Z%sf4adGCbA59bZ8-@ri;IuA&|H{6WX*sMdAEaW8EWN4l zYr`_l_-CjAc;Om>G8)1~lZa9t6|HZRiy6ts@gE#RhTDFC`v@d)&+V(KSeZZq)msj4 zvH;j78rrjlVz&v^fz<%Yq!dKD&MMv^k`E@dFhkyZ5f=!03i(-pk_OTc7ehhjxTpkj zvY)~jR>6e{)O#@Qk&`kfG(^`s2TNw8IyMm1m@q{7fNF_T&?X$QR#3!qp@0WT$w72} zPGiJxB2zjffvzi@SOE=L$^~i6QB4y8;53{HwV#0i_&1fv@^9i~tHpvk07Y0|G>ndl zy4MC;Sy5L)Qw)qtXp?MDc{r_kI8CP1;zEI^OaM72Jyzv5g!iRcZlnHlt@H-(WHg-- zJo^x*81&{J;RNnof`es65J#6KoKQI0v;&D0dH_;r!*ROKePSDrg)w?XyL{R8Zj(5p zS47TT#2TGe_KYx!;CvOoVD1M3^ufp(+u5N4#wBmME=UmmXs+p;87_`F5oS%jxFk^b~3pN-v8Zgr}9N+3XOt-s+qlI0^ z3GHrZxRI?Ix}ipjgtWYZEX4_g|sSXVd)qfbI zMWP&D5z4_;EK6~lrAQ>op;m9J2(nRG^nz^Ik5kMj>toVq9`>N^|1E;i&yJ~Tx~kcl zW&8ybYg3jVz5Chuf4_dnF2Dc%j(CY05<&*kUqHAOy7N*W{w7`-d#c%8&wh~+^_Ps7 zkZzcbyJ0H3ti>#oQTE-Bvd>>y?-yk^3{TT~rYXbhGR!W+?CXZv|9`0|sxD*gGS)6* z?K0N>Y)+_$#@c15{iTH3@8unR^pJ!M#6Q{%PzK^aFUG;}qSoE*-Ws&$sRR_H-3eST9EqE_{heto8Ia?BWH!wCMifMmqFEUuJZ~H zP|PG`iWy+yMviKaGu*=MqyCz`n1uo>fh%5*EC$x6%W$oQ#+dgm^M< zhWwM%T1u@iCm%k4Jp5VJemeT}=`;NB)2H+Ov-8va_xLk>cKq{{jz66roqss|xh=C# zdU+n=9Ch}cLo0T|; zhLmW?XO4zc71hwJj;tb7hmCQYX@l=d;wDH9TPD@-GdI#@d8cji^$Edcg2l3m(}p;? zlPJ&ioXV2rD_JhzmGnYDhb=y!`&4mKJ;U@Ar>Co~tl}iAI7zsigv&{|oP^8C0!}aP zaGFd>zAQ;X`!*k_>)a=}(F6j@``s4rcd|y4tkJZakfSF;qwNsf=%to#;u)zzNfbO5 zbFzQb*Ln(8UAhQ^{xXP*qINaznoumy)O)7pw$^l-Vq7qwg#B|!OWdO~$4m>VMdh`k zG!MErjkQZURU`DPzP-?hY-UlTszsrxhPw@1xfS#P=Q!AA6F|Su zn5k2$t6`u8zHjPIpeljpTV_N}M{9cg-kt3KN4N5g74z7KM&wWTL$nMuU)QZDRG4qL zOjE-!a3U}nf-PVlc1hJp+(Y9~ z_Wgom5r{-Ty>#@Ge21NcKuHMH%YZ<=l4I)8;7``Z@lV`RDhAbsMKb)3cF8fKA{aRZ zDnwZlXZ)$el(7}Mm^fO892EsoJLGgk)UHML z@XGX7dLg#vh1eu6@0E~JV)7&=@8x3hq#k&%9vBUSg@vD%{ztfR9pJqbcH_~zP5SRo zKOQ)?WtnQ*yJ#c_@58}(5${m3aI>t_O~ue$&3)V=Zi>c}Zw*hkJXPr_rn$0+8-i)U z*dMYm5c1ng;{m=IRUlp~+-qYwhrS>rTbZxSV6h7#$1pcs*5`Zm=?>8;rgZ>WzsSSw2F#l<9A6a}syGS^iiO>s>pP~AvT zEnBf2O;=4l5+QjBYkd^}sY$F%9t$s=(T4%I_$8xXI-`$x1V-}uHSqesGs);z!RU7a zW>82@zjRKYR{l3u#P{L01h2m)@VcehaEI&2SI$q6?%RFo`~>N~_1DhNkFpEzotNnO zDsvpG4UX7o7}O)lMZe}_t%2p`Vg2C4T6)W$KL?-2TQ0LyWR{A|QhB;8l?}!-Ix!{xFGSWdHS&(lJMGUjW#VH>)lnu_L{hGqD^;y6@sWwy^2vVEk#M&|SE4k=|m z&zZ7fx4m&lSy66;IJxtBOO1BQP6#tjv39(!VaST6uNtn$N3ncqJj%Gfhvk*R3U9 z>%Hz=F8Yj_uHpDr*I~NdH5@JMI!r62X zg*u^2O-rR#U>K1RvPQ(B^=0Zn<#Z@9VvMM~z zeKRj4^y@W2zZ64P&H1{euM+z8D9A4f{Mvco*9LTVXR%aj{j6na|LzB34Ggv!Ov-Dw z)SmM7-q4+w`tUdL%GguQ?s_)S0*XxX@a2JrUOP`0JWi^!7DDLZI(rwc8qW#-gRj=M zd4*XGZkd0b9NR14KT@`97@w9acoZYV$8c3gwRN>W_0C<;G`yI2=Pt77qVmpN2=Cm* z%sY3%y>k~+@7x7?=Pst+Ir!-Uy>l1BJJ%b9(Hbx!!DtP*Ye`KHz6OPm&9<%%5QpQ zi4H)RX+NYnTY|!Yj!n$v(-qG&)3nk1}Wx2Pmi>jfdTKA#F&gKmJTin$a?WSZMxD_ZQYVh z5|AYUS38s=@DhZ}~v@=D5spiMuIWX0&p{&k*LO2pH^&q?y{L>@HA3l=3k1Rx4e=EHr z5+t=PkkqI1{j>Aa{r3_u^=Mum37C?AsV4_aiRWNkKMNUI2PO6RuES34WirptP2U3S z+Evif3i6^;sgl*kv}&I(*9yGBN8aZG_ZsKMW!BjW2LTu$RKhWq*mgc>fE_jy^97}ATfMGV;7vz>%~37C^O z;#Oe2R-?RrJ^8e-_+Mj|gxpQX4fB9;!{nR=1MzBM(qz(8%7LPjVFgLUISv?2#sgA_ zY2WLGsZuDOzfc&2UUFx%FrT?uje!%cDWr6~cIg16u_z8w8eXR~=pYU9*pQ+i6oaD5 zlR^qaO*eoo5h8Lq8)1S4MZ`f>AX?^{tetD3nC5HBH8CC2aBWp}RHw66uE{iOgw%6N zHlAuC5=Tfar(~m8)*K~12Rsg*(0Q$Te9htlUfD|nWmR)IOT*Vk&sz( z$qTL0yZP6%Cvm{iLOALdV4>VW8(%Sjf-3kcIjb)eegCF!^py6Tku2tISGc$5fhIpH zcU%>c5nlc>jqg*-z3SKV744#3%H6)0ezT&w&9Z` z;U1(jrC4r6Z%V=3kh&a#U>4HIl#22x`&&>vJ-gft8`{;`J9o?4C&0+ z3Cc=mhID4^G;yVk{s=l#iluaB?2uqeXNFXiPcM(p+?lbyf8xG5CmrdaxSNAw=3{E9 z`lZ-iO+Qx*Pc{YnP!(%t^x{zUYDg{7r8~#FY86Ce S(Trx%jQ<63B6;BU&H(_01)C@U diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/root/alterations/publication/publication_1.gz b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/root/alterations/publication/publication_1.gz new file mode 100644 index 0000000000000000000000000000000000000000..89ed9671e145d630b3093b97cd8b5c86065e1fbc GIT binary patch literal 1488 zcmV;>1uyy^iwFo9Lv~{T18{X>Y-wX*bZKvHUoimfSZ#0HHW2^({z^ zC0!SHNneH)MLq@Y{zo!04;(o!>Vs0@5qPu^6-v#4wsdNKedy5 z`h4JIOtZ5pmrio$h-xkxkrv=2IV&1CaB3yzP_c>w<66kdNb!O-jmVtDX%>wrYpHb7 z%{HK&gKoZJC9dL}GBt-rLx$rGpFs&$F{?l)_iG<_+H0Am@*b(L2aZ;4gZLeKI6@^+ z<)9Squ|l0e0k~(nurq+VPJ$|cnhF}(Dh_&4L;o<8Qf~|WvC@L0HFjWc(?W1&09~z? z+zo%1AmXUaTA%KmKU!9ZT!89>$)woA@=8pFW(Ap~*T6M7%b?Po;tq5oyD|sdW6||2 z;+V6dH8UPP^gKE`x^DCzaOhJ%I2?7K^A)4Sgp3kttEK9~CPl9@&Y@|Pn9g(`{p9WV zd<*`hhKd|pg&lyqm99#e?ZMm=ux>11&MJ-33y;~6D_~vpZGyQU-3DT^;Y4V-vJ3A* z(wEqB#kb9Ni1Z#zQlM%&nEEw$tU|?9&1aT#6XIrAvzfBMGux$J z%oy{sETTi-I=Hp@(giJATDP^aix#B`;IeiSFOI{QQa_~rAUtqbt7kUI@0K}=dU;zE zt2AW!BFo6+7Ap4~ZadOy)hfz2hkF|MuUK`1+ZofWvj6`=zIF;#_auc>961T2Boozi z7jaxb156=2MOc-QlY2HI7$8qVfJpz;HFtm;2VU$(ejM2-ei|2XlNh(GxJe9c>xF`; zXtY}V;~w*#(q+%dRsA%5Et^@($c0Qn=?49>nWdYfJ^6K#`ahENStn`WhT$l1N1-2Y zd;S+>_iJ(r*BR?MeKoQvw)S`M!{5)^-#+>~9J(VKgg;36d6RO_RsGaFuC%O#B&V&? zER+58T#xmI*C`s>GO)s;%~}HU6zfd1i?P^N8%MWH@c#Vg^U25a9p_Xq7jS!py#y4y z&)S#3^Uh>ry<&DW!^kQJF7Mp2ZH_6eFZEwUJ zxGYNywnM3=M@#YMC|w+dhxDj`DJwX(Q)aDJ6=>15J{H+l=aHW9^M9^`HC4aNO&?h= zL+Ea2XY`;kdg8vw`TnliY~TG{$W~jAFH$9T@zOUp)T_+;ci%I9du_OEG(2;4$8sL^aHTUL6cvKc>IkGtYPC zBzjvG2CT{NrZ#IETF2ccRUs-HqER$-!zeVN0ak(<+n2zQ2+h~fdE=WH45OfU^jWNA7{r}a->|)jy>Cx_ft`;}oyZ0~|8)8n zxxf>Ub7+xQrK)FwleaTzu6a%_YjJ~hrk%x*JqLX&)eZS5^o%faiBkoT4`7Ig748X} qW4%T$TZQ%jcU%gsjrJSXV2ShM!^!3N%}X@e*S`TS+)w`P761UD_u|6< literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/root/entities/publication/publication_0.gz b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/root/entities/publication/publication_0.gz new file mode 100644 index 0000000000000000000000000000000000000000..85706bc31f8d9816f211cb79173864c530081e30 GIT binary patch literal 10874 zcmXweQ*{_o!VQmb}#b@%G0-qlSU z0|T@5;bsB??&xN0Yh_~OYUN-La^?5#vc;A3eY4jGmRh+ZYyAWQq&hXP<7-U@uGTf> zWPI-KrodAO$H0nPOD)7(*nhp)fTYO%HJWNQmBlJeiegalbmhd0yH@1<^5mrcq+s-> z4>&Cl+|^$yHoaZq&HQHV^?$K|?}0y0Bq~;>${jm*(;N05+>5eQVfHjiv{<7rvFf*E zj%`xw#-V7YK1}3P-^OwZNIVu*6kuAvCG?f={=L|HcFj6dr~T8EG(ZJ{f7RlC%4jlt zS09&JGGL8n7^WJud8v?aY>YH$P^UlnS z2T@IZ53J+ReL@MM?`mO;NXyZAp(+-!NSET<6;#sTh!pKLON% zaicu=NIH%=%AtE$eew5RTjII%tS=(I@6;I|V+j)>OFYzA)#n8zI|(H}Gxo$sw4HTN zljGGt`wJ2w9X-MkE(RY%dVfD$zkV6rfi7eh5LMs)!3rWf56KNl7jY*XLK9O?D>6zb z0BMG!hAJ>n!YW8fy|qt6_I3({>WNil7s{7t>}G23-#WAba6)A|j=zyC8cobIA0du% z%S;JOH8lqIMsuBR1_xO5M<5eoo6U%PIKWe4;S@Tqg_y?z5(nLZhgMK{pKUEY-32C4-?WtgY zk}oiRqVVN^u!kteSAe>-U#K;cj#TzS;X#;n=mo`jjf#>&>ahs%lk?zE)C+~4^`jLX zR>8g-nB=NWEC3mFd@u6BjWm^K|E7^(XcA~UaYE*m&>!ZA*W={x$n-r~672@|+DLj$ zFD3@ydxQ2CL&{;73MB(QHWuDrn~$1;M>fLaw+w};Q0r$RBv|L=g8(9F&e49B@{B7l zc>{OxQ9b@#Q|D*>&{xm z5TZnU%KkOi<`WO3QN431S;OvRR0Q;nLm5o}=tDXHsAVm<(yHNpSRs5qwyl;pr6BbC z`@3Pkqb%9M#W8_~e?R0q8NS-vMW0M!W@EzhYjUj{EEay&DJQIq94l8d=2pu(=tu@$ZPCX- z`bH#8*bK)v(X^tvMzHmTi-PDL>LgkmFrv!30j`Vm$j5Wn7o1-Zf`SqJaX+0ga!{hh z^#W|SRZSKTHV`>!({!%j%cHU&ueFFV0ek!YLVDy1XNTKN0=P`hEMJ! z_&oLMR1_iLnU2yUpDY>C*Zj4-Vn~5W!!lwW*#32I&AF&wyO{pN7#8F-HWXHTN(b~1<6sw_wEZkNT)ojSE*ypgjq+J-6&})wV!yw zGLh0*^Qd&D=kSVU$D;H?tF#W1V&08)xh0W>!ML?9LqfFsMaULx-M4tkM1Na=Jcs2X z6e&7X4$&mS!N~=RM9+UQ-uEn2;9*6Ot_-vsmzj)fiKres)QzQDL)ruo_v4`ka$yX7 zG}yC@)K0~ae*R`LKa;oDxxbH(zjO1n3ODQFb{(&A8E5BHr8VU)4r`152)wE(Te!0; zL6f_2(hIWmsX9yt3S+34J!EgU;=Zzap!j>sx@pYoAJer7x|v%9E-3 zX6b`AL{zwi;`BnH!m`SU!4Q#O=I2o6iS9T!4=4;s>1*@;vOu1(%*wsl1N{ecGSQcT z=-~4^ec$0}AUxLN;$O^10cxIiY+@oUBpbdObbmm&7lOCpdcW!LLFf8DtRElu%e?)u z>2Joi&59lvV|x@#YkwR5K2_2rpOgw z2c?;ZQmXxW@pKpM(62yiSVg2muY`%pmgww8A24Ip9ZdXeUjzorhmj7DXxA2hJ~LSV zGN0C^+o#`)*qf(r8 zM}GcOehOR>LAM#^6s|-x7b7j|#S}NUdeDHSpZouG*egga3ZqsYH(o9+$pCvM91J}E5t(N!;J{=CNuq+4EqS| z)6$5yN)c>Qo-o#fN<=}5xIroqJE<*%ZiOdN<_dbn6q1B z+(fOu5m&zu1r%zp;Toc?%h$si8p#8z`GReQd>-InVe}VX*Vxtg; ze_YYiMW_(ifAZzl9PLUO`q(?h1}2eVuiwgTE!_t6Rbi+N)og_p!4p#PJs0iuPKMEN zQUO+IejSLsgKXJ#)o26r+$f)iSEeD!WQWJY+^L}op&Q(U>VAN!(+aT2m)QZr6G^G| z27_7sB;e!Od^wSf4Vv`Kb0hbRR$~+X#_rg09u8#37J~1)^3hWoc-GPt8r_>X1F+&`4H70?onJn zrk@@t5J%(lk3`Sh82T_-4a~+t&TyEIIqbBI@+OwvG;{B9UGrh+d%wn(Zk$fapBjW{ zPMPibvZYAL=48JCm&`W2lKY2oL0e--<0d|pX?A+TA#b5=iyRq2TZu*6MgpNsvhWZN zv$C?{>mTKK z{+dOS^!pIsdyRQU{i?swOXNXwhwbT?obO$K>_{W(*t@7_v*)McBc`veS>~maD~&$VcDB^JLv+1ak%xTx%-7TT%qkDl?E$VzY4g#);c{}$2M`Nw<9h~ zusRvC63EG*?WRzuk;c5BQzq-P zsjOwy2+r;ZtlXN$zlj~u-9B2WIs?qlX$VB-LWk^URrI_iX74@vG?w6@ImXr@&Q$u& z7*qI)9=AquDoGRM9YryzlS~J=R_ttNO6t*&|7vWXsd4+MXtLA;5T++Mz2^J6l2&?= z0DbR%or~woIKwnjlX~mh(K($>bIPjzJJ2dpX6R7Q2u&tXW&E<1nByj1MXY!#M#-86ZsZ21#ew{l;3!Ip^ySIy2ATdQBkFm7UzG4HgN392|+sK8xGNizrQ&@^5jQnX!R^a*ua-*aqjknBb^1E^O$@F5CwFz z-Bz36>xXcOacYKC0-#g(+vQKeE^@EE!# zk8%h3v1f_oIQS=C$k@B{Dq|709VFD)K4AAer$NniYVpV+!niXBBHoIv?<$2dph0+3 zaBVz1N8ww;#vRt?*)5t8)vn=>OK-uDHG>H7!V8tvQzXJXoRP!Pl$*##7%+*CE)!mWyrjRLMIc-FVn z$zg26_Bgw0UWj+RR(5{lP9orPbm4rswsmm^62-pl_CAc*$f~avn-b}udFJmtNJ3Zm z5J2q`R$3gWOq<=>-gTI?yfEaQVgt@7zU+Ji1Qy;G)W$WmfxWDoT&~ z+L@B~3Zzaz7mK(ro6u)Wa9~gLW-18!hkkG1))Qnm-j7GD29Fa`%TZj*3Ca9b+D4DM zMpGXb4+~%*QP@M09m*1UX4yHRhdOheot|zU-7*&B(ns_i3`kGB)no|pR^p9U{6;YE zn!tD8X*-K6=tz5d0OaSG^W*s%y`*hQ4eBUlgpBk!lsG5@r9G{J7c4l@KOue{7*|O+0#mdL%tzobWHlwQ zCRRL|U~-@WJ$68hK)YY45(fc@C2pp}Ps`O^seh|4U^amx$huFRQmO4Ujw?_2jHhAb zvfqrpO9p3=lQ`Bt<~3Ic`dY$6rlnF$-AHnRiAmWa+0Q5tMMvd*utx-EffivepBQp9 zd4{qX1|`N7{s8I6F&08F(^1IafRKi4KlBY_90Giy2WOu@F!Dsf#Tel80TUA&(r9XE zjhuDjn)-uc4+sqG&&0)GPDFZ>X2vq$?y}6g&INr)E>1$ds}=LRjdG?)#5o!UCJ=5| z#kfHDgC%7X-sI!!r~w&3X@rH77L+&N@+BX1N$~2Py+uC@D;MbAsBkdJVMOB6vhj?y z7&g4}f)a0#erv%%;yJEIdEy5lp&im23TFO`xUuBTflHkqpLFj!rDlB`eu>ef7UyGb#R$h1`?Pw=|W&YAP90xYu+_^y{0e7#^$SkOS3q5hZ zw`qgeCNph%FioydsYBO6DN&VBEXr%##go4p3aOG~)L=Ue2rCw>!?za)u?>s=O5$du z4I-#1V-DAbF#>xcLf}0so<>FNxX=%zWL2}nCs+Y6$KQjlpq1szLm1cO6U>c3@=3{n z7CyM(xqMJr$iP$1yN03D?Te?u3FE=3ElGhuAdZP#S&)Ey*- zd7nmivcfkzE9%1{;`1>U#5MA-oHXyBu;DEtV8ij5p^bA}H_*0`_At#j<3HuulyHiX zhulT(Zy06Y=eGePk@qJkxwJGOoLtohkw$#*Ir9@m=7iuFxE0pCWo(a1m?dGN1#q}~pc9A(g}PboCC3t_hnk%4T!nnL4 z=cCW9K3H<&tMIG9YTE4%^-yv&*;$_`Lo~E9Vs;!s%;li{510_}n3$K?3HGdc<=VEw zi^A;2V6WUQ+keX=?2w>pB&IQ}6B%3){XFdEDIW2NG#3jMp`${osC@-&v|?!u7ZeD8 zoC@n;Od7=~Au+bnUvkL)rcM#9+0{Tb4QV`s(C))8<14D3N)~vi2fz7}L`Cp-l`A|K zYhs@Q(kWKUYx~!~6g_+k5}9l6ecwrSiy^`m1fM&QY{SiXRUikT*TwQ1w_-Q_QWN`R zIHF9!H$S%rOfqH}rBzVBze#TVK(0}YX~YjNvm#tlGjhi?*ED(-AG-WjwFaEguGSh~ z{Rj9>qWqkzwIWvzl*?ZfH)k;s6lx@2BMO1ED0N_WRmj6k7is;Pv@{RP@Wft!jtivT ztMmbl(VjjG7>TCBr?g$y&rS7b1{V)xcX5?SZ`IV@Fi_L|K6ku)ouo4x+2dUkD{2 zj5LaE(lC|fIpgMw0l3O2;%N!&(KiXr_7(A-Ks04_LZOivPG0+K!)pB2EuAyC1bq_t zy1iWy!6h*+j2|~^I6Zf@?#*`HUE`EQ5%ga?e|qDF=;b#!ITj?J+IPjTM#O7xK}KRl z!u`SCt=2^(^J!Qg;osO6561gbok z{j01uJL}H#6kptLwu6$lzr0YOo=O$*nKZ7p3O~Jz4$c+N_UoW;G9}cXI-7pJkTMO> zTF$g5dZx`#$udk=S&+^mCiIz#;-baiqS@f?{o07TcZm1HSGhIZdVNWWhMHRFzi_13 zVG|1T6GoU@=Bxg+Yld9|#gu8an0oHX>Lx}uLbhoBwRhT8tn!^M^d?*I#dI}GVykE2 zhipC4J0B>UR)$NMqXnO@fl-G+ZzgLuSNwAT#U?WJ>FKZi2;x~L_gO{-4}rV9gQg?mG1rfS!~H|Y?=kHRzsacw z0;V}WzLnM`STSpFqlNv6+)920OiGL4MP&vH{sIq7|L z(n;<)11R&t@vG4o%#x*k^2DpW=%2T#$oXa5JZ`;R9aWebVRX3}t* zZ68AV7c>KLR47V0HyWP3s_dC+aive81U+bYc5(X>4lTaP<5HT|Zy_S---hhIWEhe$mx)6+sND9Us)5BXk8TJ6QE zE23@ITkvt?=B39jOn`&V+MD(I8Y_RX7HT%>_TBzeXkp~~gb+Ipu*dh<44x7mSa610 zKUEK~VDOo?>B!9Z)kok}&gF+i*L@sj@_K~CAH^3@dtA%jT3zMH42O0(aw8lrQ~<&N zTs`tdu{FkZK|@89C7+-S{g22ZuAbIx0abec3wkWZaG{!G&bhO16)yHL)h_HZb1lY( zEUp&w&nBk@el?~EPJK9etV0{Mz2ufhrJCv8$F*yQI?bx&h`%Mz#m61XZ{9~bUgZYT z4jYiq)=&5I?SNfX_{$Z1EVui4{m*VI5nX)MO8b^J9GOd_hxT>9+jgLh%>?A-vP6}V zV)V`uk&_&jFdxI*_kv1)86{ZWi6;~>q;KUWb3Q0CKH-d`288cs>r=}SHdss`#@l&K zQQk74I~-UEiIM|2SPSLWa?C(pb;(g)Xo=cmC3uW6ko+piBtF{k45J*yOgcNN3{G|Q z+Uq6?&^Q(`p{p1L&uL_0EzajHK26K;;sQk#zbXPP9wY^ygM?zGd&V!w;-_Q~nd(KF z7B#hnra~*1JNTzZUoLkMd~Ovy#Gje{ZfQ|47=4XVFn!RPZ(XoE^MF_QHEmJrJ{KkEaM(3_{}08{hMDFT?mzgV|eg5CLxFylwV;Y4UqC z8c4IPvHxTTR;QbZXwjb5>Fgis`s=FXfxP&AvMwj@(v0vrNAv8J)+JjLpP*h?uv{`J z#(ri@2Yjk(=4~UZB=5Q{G{FVaZuPLYr_3nxo_?hY02cNmzuyFeWrb)EBm^j zZvIIrXpP}JPR))u7v5)Gl_3=uB4NKi1YHFz792@_PL35gC%~PjK#~F)xXZVq> zUBtQjmSc1M6<5Iy5+Y|~Q~^H4paThb$S8Le3#AD0h50^^XQ(u&0=;v&8KwnnC^9yY zoeYbWN%i#jcqERzkbliAQ?y)xc_E`hSHPs!$_ToP%bC1t@Q9E7K}5K(74|I*F$a+B zCfkPg_Cz+$1o@%kv%iMs>S5EVX6;vJX7zH1Z;Xi0N+37cs%u^6)x*hlS|acv-_7zI zxnVF(l_~O@hNzeScE0t&b@E6oI}Al+Zf)bjae18rh}@^Mg@tL(&Smyx*KD^3{;k}o zKLniS3YPpC0mNZ+imXY#Ug!0YUb=PulhZX_I5o-ZLDS=s?y=h;dyvyb;B(5@;c@MJ znG=%tKceD&$`it@10q3zTLYSIhnz*2MIT6}Qmvsx3W=%mf1%)1IBa>8#mA%|WMJI% zBL2n6?G@-fAeM^`ZpAh! zoL!K|*BN+h`7K%a>FDVH1?j*4P4vxGx`T`jjUDkWC_uPaOwGR)!-m|}eD$cZPXVdm z?XzFJ7PuP-*7om0L)alVC3>X7;CD^JLDGJ;E2HlKN0Hb>w3Hwglgp}smP`(HyTD2nvF;%)UQKT@xdHwF z`keW3-@@3tD6~noHaR=e2(4zFS}mnYG>gk}&kT zG6K*?-sGmHs42@+T}ZkuQ>Gkq@`B*&Ew%Y2KI2n4GO5)Z;p*?cy&5Q2g83N3yf6E| zZg98C;cV7tO`p~>po62ZR}GuN4cLLX1MzuvrqwGFuTuGV?vXc^zirNO-mzK$MAq>q zOG-b292!pCRJwfYrnM<(Sn8xBpyIC$XI9y!Rw4<3IPu(>=v<&BwZoM=;_OSHJ)dDz z$(+7J_r&_Qi1KI=62Gsr%>U@d+u^2(&&=P7O0v2RImtzFmk;c5p#BSn5ldXL2i*c7 z1>qsbs*E;C`rgVMaiCeAt!eX2ur5iu+x%K}@5%g1?+Dq?=}BhzgD z;J+P|QwrP_W{LSu-V;fx3SwxmowAHxMe5$^0sKB_(DV$&MgDMDv;@s}?8(&SB6<_r z|Nl?O;xyK@!SjXa<(XpsPr@XjW|(PldY{};*|Kxp=F^8KGhwW?fbNvv;m=l4N__Fe zV|k&DjRv;M%eI{#m(99TLO;h!!8iYa3Mh0 z=1l#bZP@J&Eow)bKZQM`_e~<(ZgMR}n<_bzrv%8Vr2C;vi{NI+DEF4~-HXmc6(ySw z%H{?99!NGKo14i#9!l)XF)N|BcT1&ga>8>UsYt8au5ovP5Z>y`Iq%<3orCa9aSN{A zBaAZ}l|iYfo6Kl9^poIbO<^@*psvcs{a3FJt8=`;pUF|C{VODbE3$F_?bgd@-_B~{ z^gXQVqM$P=e8&Ynl9OO1vBG5A#{LP0PxMm~bL;}MZ`k7pylh;@{mrlQ`dcx^^F5Cb zkrdw77yB#G*TMZ88+})4q$H*sE}#zsYfGl|lhBHZ3g>o3p43S<=*V*UYnY*J?rwhdW(O4~uo&iyr`kd+Vr3ai8;ZmO!YWsqJn7){!S-RxJo3RU`3S&bbrXX z)6nlg*7Ch~yPOSFl%8R36S1&OHP=muykyU8aW5joqph(iw2v8D36%cOcgHekBi?Wp zz;Tj$aEriQh6NCcNtnK7mSnUZLm-}RS!6c9(1LNw_L>&ZzZ7QFo_O)Vme=45=UWYv z_Qu8a=PyTz*c$J$2?Qrw8uWRZBTpnc?3r>w4>O#aSbU{T7xXKYmxPsw;*6{D+3w9a!IyjiU z`4t6mN8JpLlyObUG{{>W@=KI zLqGMfl&Nt0JGU|8tK&Kp+wPTfs}k|g9X<@RnfD=YM0w2osVeW+AT-M$P3)?G8;lI zMfh!~%GCujP{j_|0V%HiI)tjX?xp-HO44GC2%-tnUZ<<;+#7yaTCC${{Nts_f&^d7 zzlYr(MKKTN&|U5ke=S-4b;C2Z;&kb3o;+oGeQAz*oI=|0@Ufdkn0A2n9Dzi03oMk|*qgrEPE`I3K|g60nS zl<1!BeJCy#>djJW&MM*ah#AAsB1FiZTpPk8XdgT1r?n9_W3a^&RbL&XQq?eHs1{7% zptKq2?5{2BA|fS}B%hnsDF<_+)iw$;h0NM$wR#Ulje84nxWIbSx6YElc*Sb4Vr6D$ zjphE~aFC>xHmR$`MU~o?FD87GWb#&FUzsk?5lTlJ5cwEuH?!84_| z3Bfe5ZL4^}C(P<^|C28T$y_TA>PDlRcp)&f!;JNRMPNi@<|1nN7Z2mVws~>lpA%-0-S=A5M23vpm;jvz zTN>pyRBmKTl)V*UJgWeCg2Ibp)KZ=#+2*_vZcd%WpCsb7PQ6@J)No_f@p8ESPkZAJ zJU?E9$^Xtmg@svHY(rDR(Y^>f=~IKGzULPU9Shk0yiva%dcHUKM{$SLuR|vNJUh0b zw-(to9V)gX#`ZRD2fqr_WRll`c9JF1-Rfyf2c;3|Yd>s`I`_(Bqc@x~$IR4(@H^X0 z2JcRfMS*(eMUXXr`^P0?duL$klA1g%4RH<=Pup=?LjhXFE+<-|HN-)|z`- zj9Hmub!ufLFU;r*5zz8(I7&#`I^!6xx_jEz^p}5h3a9p-R?T2!>R6o%uwJ{Q;)R(o zTjfburz~rxY5Q?f)JV5WJC&EGe`l>@W|#enpK9`x*ZE;xmo6VsgwI%rm9SccUM+gW z-BGGUtJ^SoWH3lP)wn=~el;_ic0{B^%N8O`J$eLhvQef%i3ZCdjrc#cnrr0n8w4Yn L#g!=;4CMa+cETas literal 0 HcmV?d00001 From 2687fc9f73733d2b8486f44bbdba6b8130359551 Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Tue, 22 Nov 2022 17:30:56 +0100 Subject: [PATCH 24/55] tests for EOSC Future review - ROhub --- .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 11 ++ .../oa/graph/raw/photic-zone-transformed.xml | 108 +++++++++++++++++ .../dhp/oa/provision/EOSCFuture_Test.java | 88 ++++++++++++++ .../eosc-future/photic-zone-transformed.xml | 98 +++++++++++++++ .../oa/provision/eosc-future/photic-zone.json | 1 + .../eu/dnetlib/dhp/oa/provision/fields.xml | 112 ++++++++---------- 6 files changed, 357 insertions(+), 61 deletions(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/photic-zone-transformed.xml create mode 100644 dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/EOSCFuture_Test.java create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/photic-zone-transformed.xml create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/photic-zone.json diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index f0eadbd0d..184383f92 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -995,6 +995,17 @@ class MappersTest { } + @Test + void testEOSCFuture_ROHub() throws IOException { + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("photic-zone-transformed.xml"))); + final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); + final OtherResearchProduct rocrate = (OtherResearchProduct) list.get(0); + assertNotNull(rocrate.getEoscifguidelines()); + System.out.println("***************"); + System.out.println(new ObjectMapper().writeValueAsString(rocrate)); + System.out.println("***************"); + } + @Test void testNotWellFormed() throws IOException { final String xml = IOUtils diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/photic-zone-transformed.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/photic-zone-transformed.xml new file mode 100644 index 000000000..22bf0577e --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/photic-zone-transformed.xml @@ -0,0 +1,108 @@ + + +
+ fsh_____4119::68126da991bd76d8be494bddfbf7a1bb + https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be + + + + + + 2022-11-15T12:29:19Z + 2022-11-15T12:29:19Z + fsh_____4119 + https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be + 2022-11-15T12:29:19Z + rohub_data + ro-crate_data +
+ + + https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be + + https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be + + + https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be/resources/b1b617b2-6b79-4bae-9fa6-b76945645626 + https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be/resources/78103994-30be-4875-bf89-5acd752b5c3d + https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be/resources/18fd1c70-249b-4c67-80ee-539f801a0da7 + https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be/resources/32faa2eb-4cc8-401f-ac5c-bec2849b70e1 + https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be/resources/4c253f5a-d427-40c2-9e9f-6063ae087239 + https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be/resources/371b1957-078c-472b-a195-af7bce152c10 + https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be/resources/82f9e4b8-01b4-4e50-9e27-ec9d337c8d74 + + RO-crate + + Creative Commons Attribution 4.0 International + open access + + + Mapping the photic zone of the Mediterranean Sea + + + Estimating the penetration of light along the water column from satellite data to map the photic zone in the Mediterranean Sea + + CNR-ISMAR + + + Giorgio Castellan + + + Lorenzo Angeletti + + + Paolo Montagna + + + Marco Taviani + + + + 2022-11-14T16:32:45Z + + + Estimating the penetration of light along the water column from satellite data to map the photic zone in the Mediterranean Sea + + 2022 + + open access + + + 813.478 KB + + + Earth sciences + Ecology + Optics + + + https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be + 0048 + 2022-11-14 + OPEN + https://creativecommons.org/licenses/by/4.0/legalcode + + + + + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/EOSCFuture_Test.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/EOSCFuture_Test.java new file mode 100644 index 000000000..08bf19fe4 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/EOSCFuture_Test.java @@ -0,0 +1,88 @@ +package eu.dnetlib.dhp.oa.provision; + +import com.fasterxml.jackson.databind.DeserializationFeature; +import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.oa.provision.model.JoinedEntity; +import eu.dnetlib.dhp.oa.provision.utils.ContextMapper; +import eu.dnetlib.dhp.oa.provision.utils.StreamingInputDocumentFactory; +import eu.dnetlib.dhp.oa.provision.utils.XmlRecordFactory; +import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct; +import eu.dnetlib.dhp.utils.saxon.SaxonTransformerFactory; +import org.apache.commons.io.IOUtils; +import org.apache.solr.client.solrj.util.ClientUtils; +import org.apache.solr.common.SolrInputDocument; +import org.dom4j.Document; +import org.dom4j.DocumentException; +import org.dom4j.io.SAXReader; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import javax.xml.transform.Transformer; +import javax.xml.transform.TransformerException; +import java.io.IOException; +import java.io.StringReader; + +import static org.junit.jupiter.api.Assertions.assertNotNull; + +public class EOSCFuture_Test { + + public static ObjectMapper OBJECT_MAPPER = new ObjectMapper() + .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); + + public static final String VERSION = "2021-04-15T10:05:53Z"; + public static final String DSID = "b9ee796a-c49f-4473-a708-e7d67b84c16d_SW5kZXhEU1Jlc291cmNlcy9JbmRleERTUmVzb3VyY2VUeXBl"; + + private ContextMapper contextMapper; + + @BeforeEach + public void setUp() { + contextMapper = new ContextMapper(); + } + + + @Test + public void testEOSC_ROHub() throws IOException, DocumentException, TransformerException { + + final ContextMapper contextMapper = new ContextMapper(); + + final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, + XmlConverterJob.schemaLocation); + + final OtherResearchProduct p = OBJECT_MAPPER + .readValue(IOUtils.toString(getClass().getResourceAsStream("eosc-future/photic-zone.json")), OtherResearchProduct.class); + + final String xml = xmlRecordFactory.build(new JoinedEntity<>(p)); + + assertNotNull(xml); + + final Document doc = new SAXReader().read(new StringReader(xml)); + + assertNotNull(doc); + System.out.println(doc.asXML()); + + + testRecordTransformation(xml); + } + + + private void testRecordTransformation(final String record) throws IOException, TransformerException { + final String fields = IOUtils.toString(getClass().getResourceAsStream("fields.xml")); + final String xslt = IOUtils.toString(getClass().getResourceAsStream("layoutToRecordTransformer.xsl")); + + final String transformer = XmlIndexingJob.getLayoutTransformer("DMF", fields, xslt); + + final Transformer tr = SaxonTransformerFactory.newInstance(transformer); + + final String indexRecordXML = XmlIndexingJob.toIndexRecord(tr, record); + + final SolrInputDocument solrDoc = new StreamingInputDocumentFactory(VERSION, DSID) + .parseDocument(indexRecordXML); + + final String xmlDoc = ClientUtils.toXML(solrDoc); + + Assertions.assertNotNull(xmlDoc); + System.out.println(xmlDoc); + } + +} diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/photic-zone-transformed.xml b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/photic-zone-transformed.xml new file mode 100644 index 000000000..79830b0f7 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/photic-zone-transformed.xml @@ -0,0 +1,98 @@ + + +
+ https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be + 2022-11-15T12:29:19Z + rohub_data + ro-crate_data +
+ + + https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be + + https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be + + + https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be/resources/b1b617b2-6b79-4bae-9fa6-b76945645626 + https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be/resources/78103994-30be-4875-bf89-5acd752b5c3d + https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be/resources/18fd1c70-249b-4c67-80ee-539f801a0da7 + https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be/resources/32faa2eb-4cc8-401f-ac5c-bec2849b70e1 + https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be/resources/4c253f5a-d427-40c2-9e9f-6063ae087239 + https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be/resources/371b1957-078c-472b-a195-af7bce152c10 + https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be/resources/82f9e4b8-01b4-4e50-9e27-ec9d337c8d74 + + RO-crate + + Creative Commons Attribution 4.0 International + open access + + + Mapping the photic zone of the Mediterranean Sea + + + Estimating the penetration of light along the water column from satellite data to map the photic zone in the Mediterranean Sea + + CNR-ISMAR + + + Giorgio Castellan + + + Lorenzo Angeletti + + + Paolo Montagna + + + Marco Taviani + + + + 2022-11-14T16:32:45Z + + + Estimating the penetration of light along the water column from satellite data to map the photic zone in the Mediterranean Sea + + 2022 + + open access + + + 813.478 KB + + + Earth sciences + Ecology + Optics + + + https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be + 0048 + 2022-11-14 + OPEN + https://creativecommons.org/licenses/by/4.0/legalcode + + + + + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/photic-zone.json b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/photic-zone.json new file mode 100644 index 000000000..ffef2740a --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/photic-zone.json @@ -0,0 +1 @@ +{"collectedfrom":[{"key":"10|fairsharing_::1b69ebedb522700034547abc5652ffac","value":"ROHub","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1669132109711,"id":"50|w3id________::68126da991bd76d8be494bddfbf7a1bb","originalId":["50|fsh_____4119::68126da991bd76d8be494bddfbf7a1bb","https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be"],"pid":[{"value":"https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be","qualifier":{"classid":"w3id","classname":"w3id.org","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofcollection":"2022-11-15T12:29:19Z","dateoftransformation":"2022-11-15T12:29:19Z","extraInfo":[],"oaiprovenance":null,"processingchargeamount":null,"processingchargecurrency":null,"measures":null,"author":[{"fullname":"Giorgio Castellan","name":"","surname":"","rank":1,"pid":[],"affiliation":[]},{"fullname":"Lorenzo Angeletti","name":"","surname":"","rank":2,"pid":[],"affiliation":[]},{"fullname":"Paolo Montagna","name":"","surname":"","rank":3,"pid":[],"affiliation":[]},{"fullname":"Marco Taviani","name":"","surname":"","rank":4,"pid":[],"affiliation":[]}],"resulttype":{"classid":"other","classname":"other","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[{"value":"Earth sciences","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Ecology","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Optics","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[{"value":"Mapping the photic zone of the Mediterranean Sea","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[{"value":"2022-11-14T16:32:45Z","qualifier":{"classid":"Issued","classname":"Issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"description":[{"value":"Estimating the penetration of light along the water column from satellite data to map the photic zone in the Mediterranean Sea","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2022-11-14","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":{"value":"CNR-ISMAR","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"embargoenddate":null,"source":[],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"RO-crate","classname":"RO-crate","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"context":[],"externalReference":[],"instance":[{"license":{"value":"https://creativecommons.org/licenses/by/4.0/legalcode","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0048","classname":"Research Object","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|fairsharing_::1b69ebedb522700034547abc5652ffac","value":"ROHub","dataInfo":null},"url":["https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be"],"distributionlocation":null,"collectedfrom":{"key":"10|fairsharing_::1b69ebedb522700034547abc5652ffac","value":"ROHub","dataInfo":null},"pid":[{"value":"https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be","qualifier":{"classid":"w3id","classname":"w3id.org","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"alternateIdentifier":[],"dateofacceptance":{"value":"2022-11-14","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"measures":null}],"eoscifguidelines":null,"contactperson":[],"contactgroup":[],"tool":[]} diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/fields.xml b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/fields.xml index 910a366f6..be2ee7b98 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/fields.xml +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/fields.xml @@ -2,11 +2,11 @@ - - - - - + + + + + @@ -14,17 +14,16 @@ - - - - + + + - - - - - - + + + + + + @@ -34,18 +33,17 @@ - - - + + - - - + + + - + - + @@ -54,35 +52,36 @@ - + - + - + - - + + - + - - + + + - - - + + + @@ -94,26 +93,29 @@ - + - - + + + + + - - + + - + - + - - + + - + @@ -132,13 +134,15 @@ + - + + - + @@ -156,20 +160,6 @@ - - - - - - - - - - - - - - - + \ No newline at end of file From 2832117f232b682e1b20e8bd7854b95bee306a36 Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Tue, 22 Nov 2022 18:01:12 +0100 Subject: [PATCH 25/55] added eoscifguidelines in test --- .../eosc-future/photic-zone-transformed.xml | 98 ------------------- .../oa/provision/eosc-future/photic-zone.json | 2 +- 2 files changed, 1 insertion(+), 99 deletions(-) delete mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/photic-zone-transformed.xml diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/photic-zone-transformed.xml b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/photic-zone-transformed.xml deleted file mode 100644 index 79830b0f7..000000000 --- a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/photic-zone-transformed.xml +++ /dev/null @@ -1,98 +0,0 @@ - - -
- https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be - 2022-11-15T12:29:19Z - rohub_data - ro-crate_data -
- - - https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be - - https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be - - - https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be/resources/b1b617b2-6b79-4bae-9fa6-b76945645626 - https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be/resources/78103994-30be-4875-bf89-5acd752b5c3d - https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be/resources/18fd1c70-249b-4c67-80ee-539f801a0da7 - https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be/resources/32faa2eb-4cc8-401f-ac5c-bec2849b70e1 - https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be/resources/4c253f5a-d427-40c2-9e9f-6063ae087239 - https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be/resources/371b1957-078c-472b-a195-af7bce152c10 - https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be/resources/82f9e4b8-01b4-4e50-9e27-ec9d337c8d74 - - RO-crate - - Creative Commons Attribution 4.0 International - open access - - - Mapping the photic zone of the Mediterranean Sea - - - Estimating the penetration of light along the water column from satellite data to map the photic zone in the Mediterranean Sea - - CNR-ISMAR - - - Giorgio Castellan - - - Lorenzo Angeletti - - - Paolo Montagna - - - Marco Taviani - - - - 2022-11-14T16:32:45Z - - - Estimating the penetration of light along the water column from satellite data to map the photic zone in the Mediterranean Sea - - 2022 - - open access - - - 813.478 KB - - - Earth sciences - Ecology - Optics - - - https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be - 0048 - 2022-11-14 - OPEN - https://creativecommons.org/licenses/by/4.0/legalcode - - - - - - - -
\ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/photic-zone.json b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/photic-zone.json index ffef2740a..9729c6051 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/photic-zone.json +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/photic-zone.json @@ -1 +1 @@ -{"collectedfrom":[{"key":"10|fairsharing_::1b69ebedb522700034547abc5652ffac","value":"ROHub","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1669132109711,"id":"50|w3id________::68126da991bd76d8be494bddfbf7a1bb","originalId":["50|fsh_____4119::68126da991bd76d8be494bddfbf7a1bb","https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be"],"pid":[{"value":"https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be","qualifier":{"classid":"w3id","classname":"w3id.org","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofcollection":"2022-11-15T12:29:19Z","dateoftransformation":"2022-11-15T12:29:19Z","extraInfo":[],"oaiprovenance":null,"processingchargeamount":null,"processingchargecurrency":null,"measures":null,"author":[{"fullname":"Giorgio Castellan","name":"","surname":"","rank":1,"pid":[],"affiliation":[]},{"fullname":"Lorenzo Angeletti","name":"","surname":"","rank":2,"pid":[],"affiliation":[]},{"fullname":"Paolo Montagna","name":"","surname":"","rank":3,"pid":[],"affiliation":[]},{"fullname":"Marco Taviani","name":"","surname":"","rank":4,"pid":[],"affiliation":[]}],"resulttype":{"classid":"other","classname":"other","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[{"value":"Earth sciences","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Ecology","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Optics","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[{"value":"Mapping the photic zone of the Mediterranean Sea","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[{"value":"2022-11-14T16:32:45Z","qualifier":{"classid":"Issued","classname":"Issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"description":[{"value":"Estimating the penetration of light along the water column from satellite data to map the photic zone in the Mediterranean Sea","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2022-11-14","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":{"value":"CNR-ISMAR","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"embargoenddate":null,"source":[],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"RO-crate","classname":"RO-crate","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"context":[],"externalReference":[],"instance":[{"license":{"value":"https://creativecommons.org/licenses/by/4.0/legalcode","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0048","classname":"Research Object","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|fairsharing_::1b69ebedb522700034547abc5652ffac","value":"ROHub","dataInfo":null},"url":["https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be"],"distributionlocation":null,"collectedfrom":{"key":"10|fairsharing_::1b69ebedb522700034547abc5652ffac","value":"ROHub","dataInfo":null},"pid":[{"value":"https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be","qualifier":{"classid":"w3id","classname":"w3id.org","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"alternateIdentifier":[],"dateofacceptance":{"value":"2022-11-14","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"measures":null}],"eoscifguidelines":null,"contactperson":[],"contactgroup":[],"tool":[]} +{"collectedfrom":[{"key":"10|fairsharing_::1b69ebedb522700034547abc5652ffac","value":"ROHub","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1669134693781,"id":"50|w3id________::68126da991bd76d8be494bddfbf7a1bb","originalId":["50|fsh_____4119::68126da991bd76d8be494bddfbf7a1bb","https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be"],"pid":[{"value":"https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be","qualifier":{"classid":"w3id","classname":"w3id.org","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofcollection":"2022-11-15T12:29:19Z","dateoftransformation":"2022-11-15T12:29:19Z","extraInfo":[],"oaiprovenance":null,"processingchargeamount":null,"processingchargecurrency":null,"measures":null,"author":[{"fullname":"Giorgio Castellan","name":"","surname":"","rank":1,"pid":[],"affiliation":[]},{"fullname":"Lorenzo Angeletti","name":"","surname":"","rank":2,"pid":[],"affiliation":[]},{"fullname":"Paolo Montagna","name":"","surname":"","rank":3,"pid":[],"affiliation":[]},{"fullname":"Marco Taviani","name":"","surname":"","rank":4,"pid":[],"affiliation":[]}],"resulttype":{"classid":"other","classname":"other","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[{"value":"Earth sciences","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Ecology","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Optics","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[{"value":"Mapping the photic zone of the Mediterranean Sea","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[{"value":"2022-11-14T16:32:45Z","qualifier":{"classid":"Issued","classname":"Issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"description":[{"value":"Estimating the penetration of light along the water column from satellite data to map the photic zone in the Mediterranean Sea","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2022-11-14","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":{"value":"CNR-ISMAR","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"embargoenddate":null,"source":[],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"RO-crate","classname":"RO-crate","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"context":[],"externalReference":[],"instance":[{"license":{"value":"https://creativecommons.org/licenses/by/4.0/legalcode","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0048","classname":"Research Object","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|fairsharing_::1b69ebedb522700034547abc5652ffac","value":"ROHub","dataInfo":null},"url":["https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be"],"distributionlocation":null,"collectedfrom":{"key":"10|fairsharing_::1b69ebedb522700034547abc5652ffac","value":"ROHub","dataInfo":null},"pid":[{"value":"https://w3id.org/ro-id/28499bdf-a0c6-46aa-a96f-50bd9490b8be","qualifier":{"classid":"w3id","classname":"w3id.org","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"alternateIdentifier":[],"dateofacceptance":{"value":"2022-11-14","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"measures":null}],"eoscifguidelines":[{"code":"EOSC::Jupyter Notebook","label":"EOSC::Jupyter Notebook","url":"","semanticRelation":"compliesWith"},{"code":"EOSC::Data Cube","label":"EOSC::Data Cube","url":"","semanticRelation":"compliesWith"},{"code":"EOSC::RO-crate","label":"EOSC::RO-crate","url":"","semanticRelation":"compliesWith"}],"contactperson":[],"contactgroup":[],"tool":[]} \ No newline at end of file From a79c47522dbdd5e4af90fda3aaf52cb03de9a7f0 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 23 Nov 2022 10:17:49 +0100 Subject: [PATCH 26/55] updated ORCID datasource identifier --- .../eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java index f92040c24..ba7c7dd01 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java @@ -554,7 +554,7 @@ public class PublicationToOaf implements Serializable { private KeyValue createCollectedFrom() { KeyValue cf = new KeyValue(); cf.setValue(ModelConstants.ORCID.toUpperCase()); - cf.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + "806360c771262b4d6770e7cdf04b5c5a"); + cf.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + "cd0f74b5955dc87fd0605745c4b49ee8"); return cf; } From 0e3edc501897bfdac52bfeda6316181b9c4d5ce3 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 23 Nov 2022 11:26:36 +0100 Subject: [PATCH 27/55] [Bulk Tag] fixed issue in verb name --- .../criteria/ContainsVerbIgnoreCase.java | 2 +- .../bulktag/criteria/EqualVerbIgnoreCase.java | 2 +- .../criteria/NotContainsVerbIgnoreCase.java | 2 +- .../criteria/NotEqualVerbIgnoreCase.java | 2 +- .../communityconfiguration/tagging_conf.xml | 66 +++++++++---------- 5 files changed, 37 insertions(+), 37 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/ContainsVerbIgnoreCase.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/ContainsVerbIgnoreCase.java index a4a6f5663..501eb51b9 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/ContainsVerbIgnoreCase.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/ContainsVerbIgnoreCase.java @@ -3,7 +3,7 @@ package eu.dnetlib.dhp.bulktag.criteria; import java.io.Serializable; -@VerbClass("contains_ignorecase") +@VerbClass("contains_caseinsensitive") public class ContainsVerbIgnoreCase implements Selection, Serializable { private String param; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/EqualVerbIgnoreCase.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/EqualVerbIgnoreCase.java index c5f0ce070..1cd07755c 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/EqualVerbIgnoreCase.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/EqualVerbIgnoreCase.java @@ -3,7 +3,7 @@ package eu.dnetlib.dhp.bulktag.criteria; import java.io.Serializable; -@VerbClass("equals_ignorecase") +@VerbClass("equals_caseinsensitive") public class EqualVerbIgnoreCase implements Selection, Serializable { private String param; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/NotContainsVerbIgnoreCase.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/NotContainsVerbIgnoreCase.java index b21be83f0..e12b65a27 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/NotContainsVerbIgnoreCase.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/NotContainsVerbIgnoreCase.java @@ -3,7 +3,7 @@ package eu.dnetlib.dhp.bulktag.criteria; import java.io.Serializable; -@VerbClass("not_contains_ignorecase") +@VerbClass("not_contains_caseinsensitive") public class NotContainsVerbIgnoreCase implements Selection, Serializable { private String param; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/NotEqualVerbIgnoreCase.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/NotEqualVerbIgnoreCase.java index c6958a641..c1749621e 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/NotEqualVerbIgnoreCase.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/NotEqualVerbIgnoreCase.java @@ -3,7 +3,7 @@ package eu.dnetlib.dhp.bulktag.criteria; import java.io.Serializable; -@VerbClass("not_equals_ignorecase") +@VerbClass("not_equals_caseinsensitive") public class NotEqualVerbIgnoreCase implements Selection, Serializable { private String param; diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/communityconfiguration/tagging_conf.xml b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/communityconfiguration/tagging_conf.xml index 06c57511d..4e580edf5 100644 --- a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/communityconfiguration/tagging_conf.xml +++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/communityconfiguration/tagging_conf.xml @@ -1193,7 +1193,7 @@ - {"criteria":[{"constraint":[{"verb":"equals_ignorecase","field":"subject","value":"ciencias de la comunicación"}, + {"criteria":[{"constraint":[{"verb":"equals_caseinsensitive","field":"subject","value":"ciencias de la comunicación"}, {"verb":"equals","field":"subject","value":"Miriam"}]}, {"constraint":[{"verb":"equals","field":"subject","value":"miriam"}]}]} @@ -1317,81 +1317,81 @@ opendoar____::358aee4cc897452c00244351e4d91f69 - {"criteria":[{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"COVID-19"}]}, - {"constraint":[{"verb":"contains_ignorecase","field":"title","value":"SARS-CoV-2"}]}, - {"constraint":[{"verb":"contains_ignorecase","field":"title","value":"2019-nCoV"}}]} + {"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]}, + {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]}, + {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}}]} re3data_____::7b0ad08687b2c960d5aeef06f811d5e6 - {"criteria":[{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"COVID-19"}]}, - {"constraint":[{"verb":"contains_ignorecase","field":"title","value":"SARS-CoV-2"}]}, - {"constraint":[{"verb":"contains_ignorecase","field":"title","value":"2019-nCoV"}]}]} + {"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]}, + {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]}, + {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]} driver______::bee53aa31dc2cbb538c10c2b65fa5824 - {"criteria":[{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"COVID-19"}]}, - {"constraint":[{"verb":"contains_ignorecase","field":"title","value":"SARS-CoV-2"}]}, - {"constraint":[{"verb":"contains_ignorecase","field":"title","value":"2019-nCoV"}]}]} + {"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]}, + {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]}, + {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]} openaire____::437f4b072b1aa198adcbc35910ff3b98 - {"criteria":[{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"COVID-19"}]}, - {"constraint":[{"verb":"contains_ignorecase","field":"title","value":"SARS-CoV-2"}]}, - {"constraint":[{"verb":"contains_ignorecase","field":"title","value":"2019-nCoV"}]}]} + {"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]}, + {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]}, + {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]} openaire____::081b82f96300b6a6e3d282bad31cb6e2 - {"criteria":[{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"COVID-19"}]}, - {"constraint":[{"verb":"contains_ignorecase","field":"title","value":"SARS-CoV-2"}]}, - {"constraint":[{"verb":"contains_ignorecase","field":"title","value":"2019-nCoV"}]}]} + {"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]}, + {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]}, + {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]} openaire____::9e3be59865b2c1c335d32dae2fe7b254 - {"criteria":[{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"COVID-19"}]}, - {"constraint":[{"verb":"contains_ignorecase","field":"title","value":"SARS-CoV-2"}]}, - {"constraint":[{"verb":"contains_ignorecase","field":"title","value":"2019-nCoV"}]}]} + {"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]}, + {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]}, + {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]} opendoar____::8b6dd7db9af49e67306feb59a8bdc52c - {"criteria":[{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"COVID-19"}]}, - {"constraint":[{"verb":"contains_ignorecase","field":"title","value":"SARS-CoV-2"}]}, - {"constraint":[{"verb":"contains_ignorecase","field":"title","value":"2019-nCoV"}]}]} + {"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]}, + {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]}, + {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]} share_______::4719356ec8d7d55d3feb384ce879ad6c - {"criteria":[{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"COVID-19"}]}, - {"constraint":[{"verb":"contains_ignorecase","field":"title","value":"SARS-CoV-2"}]}, - {"constraint":[{"verb":"contains_ignorecase","field":"title","value":"2019-nCoV"}]}]} + {"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]}, + {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]}, + {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]} share_______::bbd802baad85d1fd440f32a7a3a2c2b1 - {"criteria":[{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"COVID-19"}]}, - {"constraint":[{"verb":"contains_ignorecase","field":"title","value":"SARS-CoV-2"}]}, - {"constraint":[{"verb":"contains_ignorecase","field":"title","value":"2019-nCoV"}]}]} + {"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]}, + {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]}, + {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]} opendoar____::6f4922f45568161a8cdf4ad2299f6d23 - {"criteria":[{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"COVID-19"}]}, - {"constraint":[{"verb":"contains_ignorecase","field":"title","value":"SARS-CoV-2"}]}, - {"constraint":[{"verb":"contains_ignorecase","field":"title","value":"2019-nCoV"}]}]} + {"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]}, + {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]}, + {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]} re3data_____::7980778c78fb4cf0fab13ce2159030dc - {"criteria":[{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"SARS-CoV-2"}]},{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"COVID-19"}]},{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"2019-nCov"}]}]} + {"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCov"}]}]} re3data_____::978378def740bbf2bfb420de868c460b - {"criteria":[{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"SARS-CoV-2"}]},{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"COVID-19"}]},{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"2019-nCov"}]}]} + {"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCov"}]}]} From 90c8f9cb6178cc7d257d4f3000e48d734a6c2b5d Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Wed, 23 Nov 2022 12:18:44 +0100 Subject: [PATCH 28/55] tests for EOSC Future --- .../provision/IndexRecordTransformerTest.java | 14 + .../eosc-future/software-justthink-claim.xml | 305 +++++++++++++ .../eosc-future/software-justthink.xml | 429 ++++++++++++++++++ 3 files changed, 748 insertions(+) create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/software-justthink-claim.xml create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/software-justthink.xml diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java index e0fbb2a2f..17c3cdb30 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java @@ -128,6 +128,20 @@ public class IndexRecordTransformerTest { testRecordTransformation(record); } + @Test + public void testForEOSCFutureSoftwareNotebook() throws IOException, TransformerException { + final String record = IOUtils + .toString(getClass().getResourceAsStream("eosc-future/software-justthink.xml")); + testRecordTransformation(record); + } + + @Test + public void testForEOSCFutureSoftwareNotebookClaim() throws IOException, TransformerException { + final String record = IOUtils + .toString(getClass().getResourceAsStream("eosc-future/software-justthink-claim.xml")); + testRecordTransformation(record); + } + @Test void testDoiUrlNormalization() throws MalformedURLException { diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/software-justthink-claim.xml b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/software-justthink-claim.xml new file mode 100644 index 000000000..02089bb30 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/software-justthink-claim.xml @@ -0,0 +1,305 @@ + + +
+ od______2659::3801993ea8f970cfc991277160edf277 + 2022-08-08T03:06:13Z + under curation + +
+ + + + JUSThink + Alignment Analysis + Norman, Utku + Dinkar, Tanvi + Bruno, Barbara + Clavel, Chloé + + + + +

+ 1. Description +

+

This repository contains tools to automatically analyse how + participants align their use of task-specific referents in their + dialogue and actions for a collaborative learning activity, and how + it relates to the task success (i.e. their learning + outcomes and task performance).

+

As a use case, it processes data from a collaborative problem solving + activity named JUSThink [1, 2], i.e. + JUSThink Dialogue and Actions Corpus data set that is available from the + Zenodo Repository, DOI: 10.5281/zenodo.4627104, and reproduces the results and figures + in [3].

+

In brief:

+
    +
  1. JUSThink Dialogue and Actions Corpus contains + transcripts, event logs, and test responses of children aged 9 + through 12, as they participate in the JUSThink activity [1, 2] + in pairs of two, to solve a problem on graphs together.
  2. +
  3. The JUSThink activity and its study is first + described in [1], and elaborated with findings concerning the link + between children's learning, performance in the activity, and + perception of self, the other and the robot in [2].
  4. +
  5. Alignment analysis in our work [3] studies the participants' use of + expressions that are related to the task at hand, their follow up + actions of these expressions, and how it links to task success.
  6. +
+

+ 2. Publications +

+

If you use this work in an academic context, please cite the following + publications:

+
    +
  • +

    Norman*, U., Dinkar*, T., Bruno, B., & Clavel, C. (2022). + Studying Alignment in a Collaborative Learning Activity via + Automatic Methods: The Link Between What We Say and Do. Dialogue + & Discourse, 13(2), 1 - ;48. *Contributed equally to this + work. https://doi.org/10.5210/dad.2022.201

    +
  • +
  • +

    Norman, U., Dinkar, T., Bruno, B., & Clavel, C. (2021). + JUSThink Alignment Analysis. In Dialogue & Discourse + (v1.0.0, Vol. 13, Number 2, pp. 1 - ;48). Zenodo. https://doi.org/10.5281/zenodo.4675070

    +
  • +
+

+ 3. Content +

+

The tools provided in this repository consists of 7 Jupyter Notebooks + written in Python 3, and two additional external tools utilised by the + notebooks.

+

+ 3.1. Jupyter Notebooks +

+

We highlight that the notebooks up until the last (i.e. to test the + hypotheses (tools/7_test_the_hypotheses.ipynb)) present a general + pipeline to process event logs, test responses and transcripts to + extract measures of task performance, learning outcomes, and measures of + alignment.

+
    +
  1. Extract task performance (and other features) from the logs + (tools/1_extract_performance_and_other_features_from_logs.ipynb): + Extracts various measures of task behaviour from the logs, at + varying granularities of the activity (i.e. the whole corpus, task, + attempt, and turn levels). In later notebooks, we focus on one of + the features to estimate the task performance of a team: (minimum) + error.
  2. +
  3. Extract learning outcomes from the test responses + (tools/2_extract_learning_gain_from_test_responses.ipynb): Extracts + measures of learning outcomes from the responses to the pre-test and + the post-test. In later notebooks, we focus on one of the features + to estimate the learning outcome of a team: relative learning gain + [4]
  4. +
  5. Select and visualise a subset of teams for + transcription + (tools/3_visualise_transcribed_teams.ipynb): Visualises the + transcribed teams among the other teams in the feature space spanned + by task performance and learning outcome, as well as the + distribution of their number of attempts and turns.
  6. +
  7. Extract routines from transcripts + (tools/4_extract_routines_from_transcripts.ipynb) (uses dialign to + extract routines): Extracts routines of referring expressions that + are "fixed", i.e. become shared or established amongst + interlocutors.
  8. +
  9. Combine transcripts with logs + (tools/5_construct_the_corpus_by_combining_transcripts_with_logs.ipynb): + Merges transcripts with event logs to have a combined dialogue and + actions corpus, to be processed e.g. to detect follow-up + actions.
  10. +
  11. Recognise instructions and detect follow-up actions + (tools/6_recognise_instructions_detect_follow-up_actions.ipynb): + Extracts verbalised instruction such as "connect Mount Basel to + Montreux", and pairs them with the follow-up action that may + match (e.g. if the other connects Basel to Montreux) or + mismatch (e.g. if the other connects Basel to + Neuchatel) with the instruction.
  12. +
  13. Test the hypotheses in [3] (tools/7_test_the_hypotheses.ipynb) (uses + effsize to estimate effect size, specifically + Cliff's Delta): Considers each research questions and hypotheses + studied in [3] and generates the results in [3].
  14. +
+

+ 3.2. External Tools +

+
    +
  1. dialign + tool to extract routines, specifically Release 1.0 from dialign-1.0.zip:\n It extracts routine expressions that are + "shared" among the participants from transcripts. \n It is + used as an external module (in accordance with its CeCILL-B License, + see License).
  2. +
  3. effsize tool to compute estimators of effect + size.\n We specifically use it to compute Cliff's Delta, which + quantifies the amount difference between two groups of observations, + by computing the Cliff's Delta statistic.\n It is taken from + project DABEST (see License).
  4. +
+

+ 4. Research Questions and Hypotheses in [3] +

+
    +
  • RQ1 Lexical alignment: How do the interlocutors + use expressions related to the task? Is this associated + with task success?
      +
    • H1.1: Task-specific referents become + routine early for more successful teams.
    • +
    • H1.2: Hesitation phenomena are more likely + to occur in the vicinity of priming and establishment of + task-specific referents for more successful teams.
    • +
    +
  • +
  • RQ2 Behavioural alignment: How do the interlocutors + follow up these expressions with actions? Is this + associated with task success?
      +
    • H2.1: Instructions are more likely to be + followed by a corresponding action early in the dialogue for + more successful teams.
    • +
    • H2.2: When instructions are followed by a + corresponding or a different action, the action is more + likely to be in the vicinity of information management + phenomena for more successful teams.
    • +
    +
  • +
+

The RQs and Hs are addressed in the notebook for testing the hypotheses + (i.e. tools/7_test_the_hypotheses.ipynb).

+

+ Acknowledgements +

+

This project has received funding from the European Union's Horizon + 2020 research and innovation programme under grant agreement No 765955. + Namely, the ANIMATAS Project.

+

+ License +

+

The whole package is under MIT License, see the LICENSE + file.

+

Classes under the tools/effsize package were taken from + project DABEST, Copyright 2016-2020 Joses W. Ho. + These classes are licensed under the BSD 3-Clause Clear License. See + tools/effsize/LICENSE file for additional + details.

+

Classes under the tools/dialign-1.0 package were taken + from project dialign. These classes are licensed under the + CeCILL-B License. This package is used as an "external + module", see tools/dialign-1.0/LICENSE.txt for + additional details.

+
+ + + + Zenodo + + + + + + + + + + + + + + + + + + + oai:zenodo.org:4675070 + + oai:zenodo.org:4675070 + 10.5281/zenodo.4675070 + + + + false + false + 0.9 + + + + + + corda__h2020::c4515ebef538a734cf11f795347f5dac + 765955 + ANIMATAS + Advancing intuitive human-machine interaction with human-like + social capabilities for education in schools + + + + ec__________::EC::H2020 + + + + + + + + + + + + + https://zenodo.org/record/4675070 + + + +
+
+
+
+
diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/software-justthink.xml b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/software-justthink.xml new file mode 100644 index 000000000..9c0f4ea7d --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/software-justthink.xml @@ -0,0 +1,429 @@ + + +
+ doi_dedup___::c054151b6a8c4f41c7acf160651a6503 + 2022-10-13T00:15:44+0000 + 2022-10-13T07:44:29.152Z +
+ + + + + + oai:zenodo.org:4675070 + 50|od______2659::3801993ea8f970cfc991277160edf277 + oai:zenodo.org:6974562 + 50|od______2659::9c87ff4a5e7710052b873088e7265072 + 10.5281/zenodo.4675069 + 10.5281/zenodo.4675070 + 10.5281/zenodo.6974562 + 10.5281/zenodo.4675069 + + + + + + JUSThink Alignment + Analysis + + Norman, Utku + Dinkar, Tanvi + Bruno, Barbara + Clavel, Chloé + 2022-08-08 + &lt;strong>1. Description&lt;/strong> This repository + contains&lt;strong> tools to automatically analyse how participants align + their use of task-specific referents in their dialogue and actions for a + collaborative learning activity, and how it relates to the task + success&lt;/strong> (i.e. their learning outcomes and task performance). As + a use case, it processes data from a collaborative problem solving activity + named JUSThink [1, 2], i.e. JUSThink Dialogue and Actions Corpus data set that + is available from the Zenodo Repository, DOI: 10.5281/zenodo.4627104, and + reproduces the results and figures in [3]. In brief: &lt;strong>JUSThink + Dialogue and Actions Corpus&lt;/strong> contains transcripts, event logs, + and test responses of children aged 9 through 12, as they participate in the + JUSThink activity [1, 2] in pairs of two, to solve a problem on graphs together. + &lt;strong>The JUSThink activity and its study&lt;/strong> is first + described in [1], and elaborated with findings concerning the link between + children's learning, performance in the activity, and perception of self, the + other and the robot in [2]. &lt;strong>Alignment analysis in our work + [3]&lt;/strong> studies the participants' use of expressions that are + related to the task at hand, their follow up actions of these expressions, and + how it links to task success. &lt;strong>Changes in Release + v1.1.0:&lt;/strong> updated with the publication information, finalized + paper structure, research questions and hypotheses as in the published article: + U. Norman*&lt;em>, &lt;/em>T. Dinkar*, B. Bruno, and C. Clavel, + "Studying Alignment in a Collaborative Learning Activity via Automatic Methods: + The Link Between What We Say and Do," Dialogue &amp;amp; Discourse, 13(2), + 1–48. *Contributed equally to this work. 10.5210/dad.2022.201. + &lt;strong>Full Changelog:&lt;/strong> + https://github.com/chili-epfl/justhink-alignment-analysis/compare/v1.0.0...v1.1.0 + &lt;strong>2. Publications&lt;/strong> If you use this work in an + academic context, please cite the following publications: Norman*, U., Dinkar*, + T., Bruno, B., &amp;amp; Clavel, C. (2022). Studying Alignment in a + Collaborative Learning Activity via Automatic Methods: The Link Between What We + Say and Do. Dialogue &amp;amp; Discourse, 13(2), 1–48. *Contributed equally + to this work. https://doi.org/10.5210/dad.2022.201 Norman, U., Dinkar, T., + Bruno, B., &amp;amp; Clavel, C. (2021). JUSThink Alignment Analysis. In + Dialogue &amp;amp; Discourse (v1.1.0, Vol. 13, Number 2, pp. 1–48). Zenodo. + https://doi.org/10.5281/zenodo.6974562 &lt;strong>3. Content&lt;/strong> + The tools provided in this repository consists of 7 Jupyter Notebooks written in + Python 3, and two additional external tools utilised by the notebooks. + &lt;strong>3.1. Jupyter Notebooks&lt;/strong> We highlight that the + notebooks up until the last (i.e. to test the hypotheses + (tools/7_test_the_hypotheses.ipynb)) present a general pipeline to process event + logs, test responses and transcripts to extract measures of task performance, + learning outcomes, and measures of alignment. &lt;strong>Extract task + performance (and other features) from the logs + &lt;/strong>(tools/1_extract_performance_and_other_features_from_logs.ipynb): + Extracts various measures of task behaviour from the logs, at varying + granularities of the activity (i.e. the whole corpus, task, attempt, and turn + levels). In later notebooks, we focus on one of the features to estimate the + task performance of a team: (minimum) error. &lt;strong>Extract learning + outcomes from the test responses&lt;/strong> + (tools/2_extract_learning_gain_from_test_responses.ipynb): Extracts measures of + learning outcomes from the responses to the pre-test and the post-test. In later + notebooks, we focus on one of the features to estimate the learning outcome of a + team: relative learning gain [4] &lt;strong>Select and visualise a subset of + teams for transcription&lt;/strong> + (tools/3_visualise_transcribed_teams.ipynb): Visualises the transcribed teams + among the other teams in the feature space spanned by task performance and + learning outcome, as well as the distribution of their number of attempts and + turns. &lt;strong>Extract routines from transcripts&lt;/strong> + (tools/4_extract_routines_from_transcripts.ipynb) (uses dialign to extract + routines): Extracts routines of referring expressions that are "fixed", i.e. + become shared or established amongst interlocutors. &lt;strong>Combine + transcripts with logs&lt;/strong> + (tools/5_construct_the_corpus_by_combining_transcripts_with_logs.ipynb): Merges + transcripts with event logs to have a combined dialogue and actions corpus, to + be processed e.g. to detect follow-up actions. &lt;strong>Recognise + instructions and detect follow-up actions&lt;/strong> + (tools/6_recognise_instructions_detect_follow-up_actions.ipynb): Extracts + verbalised instruction such as "connect Mount Basel to Montreux", and pairs them + with the follow-up action that may &lt;em>match&lt;/em> (e.g. if the + other connects Basel to Montreux) or &lt;em>mismatch&lt;/em> (e.g. if + the other connects Basel to Neuchatel) with the instruction. &lt;strong>Test + the hypotheses &lt;/strong>in [3] (tools/7_test_the_hypotheses.ipynb) (uses + &lt;strong>effsize&lt;/strong> to estimate effect size, specifically + Cliff's Delta): Considers each research questions and hypotheses studied in [3] + and generates the results in [3]. &lt;strong>3.2. External + Tools&lt;/strong> &lt;strong>dialign tool&lt;/strong> to extract + routines, specifically Release 1.0 from dialign-1.0.zip:&lt;br> It extracts + routine expressions that are "shared" among the participants from transcripts. + &lt;br> It is used as an external module (in accordance with its CeCILL-B + License, see &lt;strong>License&lt;/strong>). &lt;strong>effsize + tool&lt;/strong> to compute estimators of effect size.&lt;br> We + specifically use it to compute Cliff's Delta, which quantifies the amount + difference between two groups of observations, by computing the Cliff's Delta + statistic.&lt;br> It is taken from project DABEST (see + &lt;strong>License&lt;/strong>). &lt;strong>4. Research Questions + and Hypotheses in [3]&lt;/strong> &lt;strong>RQ1 Lexical + alignment&lt;/strong>: How do the interlocutors &lt;em>use&lt;/em> + expressions related to the task? Is this associated with task success? + &lt;strong>H1.1&lt;/strong>: Task-specific referents become routine + early for more successful teams. &lt;strong>H1.2&lt;/strong>: Hesitation + phenomena are more likely to occur in the vicinity of priming and establishment + of task-specific referents for more successful teams. &lt;strong>RQ2 + Behavioural alignment&lt;/strong>: How do the interlocutors + &lt;em>follow up&lt;/em> these expressions with actions? Is this + associated with task success? &lt;strong>H2.1&lt;/strong>: Instructions + are more likely to be followed by a corresponding action early in the dialogue + for more successful teams. &lt;strong>H2.2&lt;/strong>: When + instructions are followed by a corresponding or a different action, the action + is more likely to be in the vicinity of information management phenomena for + more successful teams. The RQs and Hs are addressed in the notebook for testing + the hypotheses (i.e. tools/7_test_the_hypotheses.ipynb). + &lt;strong>Acknowledgements&lt;/strong> This project has received + funding from the European Union's Horizon 2020 research and innovation programme + under grant agreement No 765955. Namely, the ANIMATAS Project. + &lt;strong>License&lt;/strong> The whole package is under MIT License, + see the &lt;strong>LICENSE&lt;/strong> file. Classes under the + &lt;strong>tools/effsize&lt;/strong> package were taken from project + &lt;strong>DABEST&lt;/strong>, Copyright 2016-2020 Joses W. Ho. These + classes are licensed under the BSD 3-Clause Clear License. See + &lt;strong>tools/effsize/LICENSE&lt;/strong> file for additional + details. Classes under the &lt;strong>tools/dialign-1.0&lt;/strong> + package were taken from project &lt;strong>dialign&lt;/strong>. These + classes are licensed under the CeCILL-B License. This package is used as an + "external module", see&lt;strong> + tools/dialign-1.0/LICENSE.txt&lt;/strong> for additional + details. + {"references": ["[1] J. Nasir, U. Norman, B. Bruno, and P. Dillenbourg, + \"You Tell, I Do, and We Swap until we Connect All the Gold Mines!,\" ERCIM + News, vol. 2020, no. 120, 2020, [Online]. Available: + https://ercim-news.ercim.eu/en120/special/you-tell-i-do-and-we-swap-until-we-connect-all-the-gold-mines", + "[2] J. Nasir*, U. Norman*, B. Bruno, and P. Dillenbourg, \"When Positive + Perception of the Robot Has No Effect on Learning,\" in 2020 29th IEEE + International Conference on Robot and Human Interactive Communication (RO-MAN), + Aug. 2020, pp. 313\u2013320, doi: 10.1109/RO-MAN47096.2020.9223343", "[3] U. + Norman*, T. Dinkar*, B. Bruno, and C. Clavel, \"Studying Alignment in a + Collaborative Learning Activity via Automatic Methods: The Link Between What We + Say and Do,\" Dialogue &amp;amp; Discourse, vol. 13, no. 2, pp. 1\u201348, + Aug. 2022, doi: 10.5210/dad.2022.201.", "[4] M. Sangin, G. Molinari, M.-A. + N\u00fcssli, and P. Dillenbourg, \"Facilitating peer knowledge modeling: Effects + of a knowledge awareness tool on collaborative learning outcomes and + processes,\"\" Computers in Human Behavior, vol. 27, no. 3, pp. 1059\u20131067, + May 2011, doi: 10.1016/j.chb.2010.05.032."]} + alignment + situated + dialogue + collaborative + learning + spontaneous + speech + disfluency + mutual + understanding + + 2021-04-09 + 2022-08-08 + Zenodo + + + + + + + + + + + true + false + 0.8 + dedup-result-decisiontree-v3 + + + + + doi_dedup___::ae235765bbc422195a6c9f632b2d77eb + + 2104.04429 + + arXiv + + 2022-08-05 + Studying + Alignment in a Collaborative Learning Activity via Automatic Methods: + The Link Between What We Say and Do + + + 10.48550/arxiv.2104.04429 + 10.5210/dad.2022.201 + + + corda__h2020::c4515ebef538a734cf11f795347f5dac + Advancing intuitive human-machine interaction with human-like social + capabilities for education in schools + 765955 + + + ec__________::EC::H2020 + ec__________::EC::H2020::MSCA-ITN-ETN + + ANIMATAS + + + doi_dedup___::0a6314b0ed275d915f5b57a259375691 + 2021-03-22 + Zenodo + 10.5281/zenodo.4627104 + JUSThink Dialogue and Actions Corpus + 10.5281/zenodo.4627103 + + + + + + + Zenodo + 10.5281/zenodo.4675070 + JUSThink Alignment Analysis + 2021-04-09 + + + + 2022-08-08 + Zenodo + 10.5281/zenodo.6974562 + + JUSThink Alignment Analysis (v1.1.0) + + + JUSThink + Alignment Analysis (v1.1.0) + 2022-08-08 + Zenodo + 10.5281/zenodo.4675069 + + + + + + + 2022-08-08 + + 10.5281/zenodo.4675069 + + https://opensource.org/licenses/MIT + + https://doi.org/10.5281/zenodo.4675069 + + + + + + + 2022-08-08 + + 10.5281/zenodo.6974562 + + https://opensource.org/licenses/MIT + + https://doi.org/10.5281/zenodo.6974562 + + + + + + + 2021-04-09 + + 10.5281/zenodo.4675070 + + https://opensource.org/licenses/MIT + + https://doi.org/10.5281/zenodo.4675070 + + + + + + +
+
From 24ef301cc1d962cd2c613be6ad23e6634ebaa4f4 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 28 Nov 2022 09:53:23 +0100 Subject: [PATCH 29/55] [graph cleaning] patch the result's collectedfrom and hostedby identifiers according to the datasource master-duplicate mapping --- .../ReadDatasourceMasterDuplicateFromDB.java | 76 +++++++ .../common/action/model/MasterDuplicate.java | 29 +++ .../oa/graph/clean/MasterDuplicateAction.java | 45 ++++ .../graph/clean/cfhb/CleanCfHbSparkJob.java | 207 ++++++++++++++++++ .../oa/graph/clean/cfhb/IdCfHbMapping.java | 44 ++++ .../dhp/oa/graph/clean/oozie_app/workflow.xml | 202 ++++++++++++++++- .../oa/graph/datasourcemaster_parameters.json | 32 +++ .../oa/graph/input_clean_cfhb_parameters.json | 32 +++ .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 3 +- .../dhp/oa/provision/EOSCFuture_Test.java | 105 ++++----- 10 files changed, 719 insertions(+), 56 deletions(-) create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/action/ReadDatasourceMasterDuplicateFromDB.java create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/action/model/MasterDuplicate.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/MasterDuplicateAction.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/cfhb/CleanCfHbSparkJob.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/cfhb/IdCfHbMapping.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/datasourcemaster_parameters.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/input_clean_cfhb_parameters.json diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/action/ReadDatasourceMasterDuplicateFromDB.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/action/ReadDatasourceMasterDuplicateFromDB.java new file mode 100644 index 000000000..d9e8ced85 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/action/ReadDatasourceMasterDuplicateFromDB.java @@ -0,0 +1,76 @@ + +package eu.dnetlib.dhp.common.action; + +import java.io.BufferedWriter; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.nio.charset.StandardCharsets; +import java.sql.ResultSet; +import java.sql.SQLException; + +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.common.DbClient; +import eu.dnetlib.dhp.common.action.model.MasterDuplicate; +import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; + +public class ReadDatasourceMasterDuplicateFromDB { + + private static final Logger log = LoggerFactory.getLogger(ReadDatasourceMasterDuplicateFromDB.class); + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + private static final String QUERY = "SELECT id as master, duplicate FROM dsm_dedup_services;"; + + public static int execute(String dbUrl, String dbUser, String dbPassword, String hdfsPath, String hdfsNameNode) + throws IOException { + int count = 0; + try (DbClient dbClient = new DbClient(dbUrl, dbUser, dbPassword)) { + Configuration conf = new Configuration(); + conf.set("fs.defaultFS", hdfsNameNode); + FileSystem fileSystem = FileSystem.get(conf); + FSDataOutputStream fos = fileSystem.create(new Path(hdfsPath)); + + log.info("running query: {}", QUERY); + log.info("storing results in: {}", hdfsPath); + + try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(fos, StandardCharsets.UTF_8))) { + dbClient.processResults(QUERY, rs -> writeMap(datasourceMasterMap(rs), writer)); + count++; + } + } + return count; + } + + private static MasterDuplicate datasourceMasterMap(ResultSet rs) { + try { + MasterDuplicate md = new MasterDuplicate(); + final String master = rs.getString("master"); + final String duplicate = rs.getString("duplicate"); + md.setMaster(OafMapperUtils.createOpenaireId(10, master, true)); + md.setDuplicate(OafMapperUtils.createOpenaireId(10, duplicate, true)); + + return md; + } catch (final SQLException e) { + throw new RuntimeException(e); + } + } + + private static void writeMap(final MasterDuplicate dm, final BufferedWriter writer) { + try { + writer.write(OBJECT_MAPPER.writeValueAsString(dm)); + writer.newLine(); + } catch (final IOException e) { + throw new RuntimeException(e); + } + } + +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/action/model/MasterDuplicate.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/action/model/MasterDuplicate.java new file mode 100644 index 000000000..b3e0d2aaa --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/action/model/MasterDuplicate.java @@ -0,0 +1,29 @@ + +package eu.dnetlib.dhp.common.action.model; + +import java.io.Serializable; + +/** + * @author miriam.baglioni + * @Date 21/07/22 + */ +public class MasterDuplicate implements Serializable { + private String duplicate; + private String master; + + public String getDuplicate() { + return duplicate; + } + + public void setDuplicate(String duplicate) { + this.duplicate = duplicate; + } + + public String getMaster() { + return master; + } + + public void setMaster(String master) { + this.master = master; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/MasterDuplicateAction.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/MasterDuplicateAction.java new file mode 100644 index 000000000..8bf36ff82 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/MasterDuplicateAction.java @@ -0,0 +1,45 @@ + +package eu.dnetlib.dhp.oa.graph.clean; + +import org.apache.commons.io.IOUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.action.ReadDatasourceMasterDuplicateFromDB; + +public class MasterDuplicateAction { + + private static final Logger log = LoggerFactory.getLogger(MasterDuplicateAction.class); + + public static void main(final String[] args) throws Exception { + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + MasterDuplicateAction.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/datasourcemaster_parameters.json"))); + + parser.parseArgument(args); + + final String dbUrl = parser.get("postgresUrl"); + log.info("postgresUrl: {}", dbUrl); + + final String dbUser = parser.get("postgresUser"); + log.info("postgresUser: {}", dbUser); + + final String dbPassword = parser.get("postgresPassword"); + log.info("postgresPassword: {}", dbPassword); + + final String hdfsPath = parser.get("hdfsPath"); + log.info("hdfsPath: {}", hdfsPath); + + final String hdfsNameNode = parser.get("hdfsNameNode"); + log.info("hdfsNameNode: {}", hdfsNameNode); + + int rows = ReadDatasourceMasterDuplicateFromDB.execute(dbUrl, dbUser, dbPassword, hdfsPath, hdfsNameNode); + + log.info("written {} rows", rows); + } + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/cfhb/CleanCfHbSparkJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/cfhb/CleanCfHbSparkJob.java new file mode 100644 index 000000000..ad7e252f6 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/cfhb/CleanCfHbSparkJob.java @@ -0,0 +1,207 @@ + +package eu.dnetlib.dhp.oa.graph.clean.cfhb; + +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +import java.util.Objects; +import java.util.Optional; +import java.util.stream.Stream; + +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.FlatMapFunction; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.*; +import org.apache.spark.sql.expressions.Aggregator; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.action.model.MasterDuplicate; +import eu.dnetlib.dhp.oa.graph.clean.country.CleanCountrySparkJob; +import eu.dnetlib.dhp.schema.oaf.Instance; +import eu.dnetlib.dhp.schema.oaf.KeyValue; +import eu.dnetlib.dhp.schema.oaf.Result; +import scala.Tuple2; + +public class CleanCfHbSparkJob { + + private static final Logger log = LoggerFactory.getLogger(CleanCfHbSparkJob.class); + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + public static void main(String[] args) throws Exception { + + String jsonConfiguration = IOUtils + .toString( + CleanCountrySparkJob.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/input_clean_cfhb_parameters.json")); + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); + parser.parseArgument(args); + + Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + String inputPath = parser.get("inputPath"); + log.info("inputPath: {}", inputPath); + + String workingPath = parser.get("workingPath"); + log.info("workingPath: {}", workingPath); + + String outputPath = parser.get("outputPath"); + log.info("outputPath: {}", outputPath); + + String masterDuplicatePath = parser.get("masterDuplicatePath"); + log.info("masterDuplicatePath: {}", masterDuplicatePath); + + String graphTableClassName = parser.get("graphTableClassName"); + log.info("graphTableClassName: {}", graphTableClassName); + + Class entityClazz = (Class) Class.forName(graphTableClassName); + + SparkConf conf = new SparkConf(); + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> { + cleanCfHb( + spark, inputPath, entityClazz, workingPath, masterDuplicatePath, outputPath); + }); + } + + private static void cleanCfHb(SparkSession spark, String inputPath, Class entityClazz, + String workingPath, String masterDuplicatePath, String outputPath) { + // read the master-duplicate tuples + Dataset md = spark + .read() + .textFile(masterDuplicatePath) + .map(as(MasterDuplicate.class), Encoders.bean(MasterDuplicate.class)); + + // read the result table + Dataset res = spark + .read() + .textFile(inputPath) + .map(as(entityClazz), Encoders.bean(entityClazz)); + + // prepare the resolved CF|HB references with the corresponding EMPTY master ID + Dataset resolved = res + .flatMap( + (FlatMapFunction) r -> Stream + .concat( + r.getCollectedfrom().stream().map(KeyValue::getKey), + Stream + .concat( + r.getInstance().stream().map(Instance::getHostedby).map(KeyValue::getKey), + r.getInstance().stream().map(Instance::getCollectedfrom).map(KeyValue::getKey))) + .distinct() + .map(s -> asIdCfHbMapping(r.getId(), s)) + .iterator(), + Encoders.bean(IdCfHbMapping.class)); + + final String resolvedPath = workingPath + "/cfHbResolved"; + + // set the EMPTY master ID and save it aside + resolved + .joinWith(md, resolved.col("cfhb").equalTo(md.col("duplicate"))) + .map((MapFunction, IdCfHbMapping>) t -> { + t._1().setMaster(t._2().getMaster()); + return t._1(); + }, Encoders.bean(IdCfHbMapping.class)) + .write() + .mode(SaveMode.Overwrite) + .parquet(resolvedPath); + + // read again the resolved CF|HB mapping + Dataset resolvedDS = spark + .read() + .load(resolvedPath) + .as(Encoders.bean(IdCfHbMapping.class)); + + // Join the results with the resolved CF|HB mapping, apply the mapping and save it + res + .joinWith(resolvedDS, res.col("id").equalTo(resolved.col("resultId")), "left") + .groupByKey((MapFunction, String>) t -> t._1().getId(), Encoders.STRING()) + .agg(new IdCfHbMappingAggregator(entityClazz).toColumn()) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath); + } + + public static class IdCfHbMappingAggregator extends Aggregator { + + private final Class entityClazz; + + public IdCfHbMappingAggregator(Class entityClazz) { + this.entityClazz = entityClazz; + } + + @Override + public T zero() { + try { + return entityClazz.newInstance(); + } catch (InstantiationException | IllegalAccessException e) { + throw new RuntimeException(e); + } + } + + @Override + public T reduce(T r, IdCfHbMapping a) { + if (Objects.isNull(a) && StringUtils.isBlank(a.getMaster())) { + return r; + } + r.getCollectedfrom().forEach(kv -> updateKey(kv, a)); + r.getInstance().forEach(i -> { + updateKey(i.getHostedby(), a); + updateKey(i.getCollectedfrom(), a); + }); + return r; + } + + @Override + public T merge(T b1, T b2) { + if (Objects.isNull(b1.getId())) { + return b2; + } + return b1; + } + + @Override + public T finish(T r) { + return r; + } + + private void updateKey(final KeyValue kv, final IdCfHbMapping a) { + if (kv.getKey().equals(a.getCfhb())) { + kv.setKey(a.getMaster()); + } + } + + @Override + public Encoder bufferEncoder() { + return Encoders.bean(entityClazz); + } + + @Override + public Encoder outputEncoder() { + return Encoders.bean(entityClazz); + } + } + + private static IdCfHbMapping asIdCfHbMapping(String resultId, String cfHb) { + IdCfHbMapping m = new IdCfHbMapping(resultId); + m.setCfhb(cfHb); + return m; + } + + private static MapFunction as(Class clazz) { + return s -> OBJECT_MAPPER.readValue(s, clazz); + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/cfhb/IdCfHbMapping.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/cfhb/IdCfHbMapping.java new file mode 100644 index 000000000..16d1a2613 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/cfhb/IdCfHbMapping.java @@ -0,0 +1,44 @@ + +package eu.dnetlib.dhp.oa.graph.clean.cfhb; + +import java.io.Serializable; + +public class IdCfHbMapping implements Serializable { + + private String resultid; + + private String cfhb; + + private String master; + + public IdCfHbMapping() { + } + + public IdCfHbMapping(String id) { + this.resultid = id; + } + + public String getResultid() { + return resultid; + } + + public void setResultid(String resultid) { + this.resultid = resultid; + } + + public String getCfhb() { + return cfhb; + } + + public void setCfhb(String cfhb) { + this.cfhb = cfhb; + } + + public String getMaster() { + return master; + } + + public void setMaster(String master) { + this.master = master; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/clean/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/clean/oozie_app/workflow.xml index 6435d5131..e717fac0f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/clean/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/clean/oozie_app/workflow.xml @@ -317,13 +317,13 @@ - + yarn @@ -434,7 +434,6 @@ - yarn @@ -460,13 +459,13 @@ - + yarn @@ -583,7 +582,202 @@ - + + + + + ${wf:conf('shouldClean') eq true} + + + + + + + eu.dnetlib.dhp.oa.graph.clean.MasterDuplicateAction + --postgresUrl${postgresURL} + --postgresUser${postgresUser} + --postgresPassword${postgresPassword} + --hdfsPath${workingDir}/masterduplicate + --hdfsNameNode${nameNode} + + + + + + + + + + + + + + + yarn + cluster + patch publication cfhb + eu.dnetlib.dhp.oa.graph.clean.cfhb.CleanCfHbSparkJob + dhp-graph-mapper-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=7680 + + --inputPath${graphOutputPath}/publication + --outputPath${workingPath}/cfHbPatched/publication + --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Publication + --workingDir${workingDir}/working/publication + --masterDuplicatePath${workingDir}/masterduplicate + + + + + + + + yarn + cluster + patch dataset cfhb + eu.dnetlib.dhp.oa.graph.clean.cfhb.CleanCfHbSparkJob + dhp-graph-mapper-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=7680 + + --inputPath${graphOutputPath}/dataset + --outputPath${workingPath}/cfHbPatched/dataset + --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Dataset + --workingDir${workingDir}/working/dataset + --masterDuplicatePath${workingDir}/masterduplicate + + + + + + + + yarn + cluster + patch otherresearchproduct cfhb + eu.dnetlib.dhp.oa.graph.clean.cfhb.CleanCfHbSparkJob + dhp-graph-mapper-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=7680 + + --inputPath${graphOutputPath}/otherresearchproduct + --outputPath${workingPath}/cfHbPatched/otherresearchproduct + --graphTableClassNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct + --workingDir${workingDir}/working/otherresearchproduct + --masterDuplicatePath${workingDir}/masterduplicate + + + + + + + + yarn + cluster + patch software cfhb + eu.dnetlib.dhp.oa.graph.clean.cfhb.CleanCfHbSparkJob + dhp-graph-mapper-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=7680 + + --inputPath${graphOutputPath}/software + --outputPath${workingPath}/cfHbPatched/software + --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Software + --workingDir${workingDir}/working/software + --masterDuplicatePath${workingDir}/masterduplicate + + + + + + + + + + + + + + + + + + + + ${workingPath}/cfHbPatched/publication + ${graphOutputPath}/publication + + + + + + + + + + + ${workingPath}/cfHbPatched/dataset + ${graphOutputPath}/dataset + + + + + + + + + + + ${workingPath}/cfHbPatched/otherresearchproduct + ${graphOutputPath}/otherresearchproduct + + + + + + + + + + + ${workingPath}/cfHbPatched/software + ${graphOutputPath}/software + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/datasourcemaster_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/datasourcemaster_parameters.json new file mode 100644 index 000000000..fbe2cca10 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/datasourcemaster_parameters.json @@ -0,0 +1,32 @@ +[ + { + "paramName": "pu", + "paramLongName": "postgresUrl", + "paramDescription": "the jdbc url to the postgres", + "paramRequired": true + }, + { + "paramName": "uid", + "paramLongName": "postgresUser", + "paramDescription": "the postgres user", + "paramRequired": true + }, + { + "paramName": "pwd", + "paramLongName": "postgresPassword", + "paramDescription": "the postgres password=", + "paramRequired": true + }, + { + "paramName": "p", + "paramLongName": "hdfsPath", + "paramDescription": "the target path on HDFS", + "paramRequired": true + }, + { + "paramName": "nn", + "paramLongName": "hdfsNameNode", + "paramDescription": "the HDFS nameNode", + "paramRequired": true + } +] \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/input_clean_cfhb_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/input_clean_cfhb_parameters.json new file mode 100644 index 000000000..8b8a5f70e --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/input_clean_cfhb_parameters.json @@ -0,0 +1,32 @@ +[ + { + "paramName": "issm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "when true will stop SparkSession after job execution", + "paramRequired": false + }, + { + "paramName": "in", + "paramLongName": "inputPath", + "paramDescription": "the path to the graph data dump to read", + "paramRequired": true + }, + { + "paramName": "out", + "paramLongName": "outputPath", + "paramDescription": "the path to store the output graph", + "paramRequired": true + }, + { + "paramName": "class", + "paramLongName": "graphTableClassName", + "paramDescription": "class name moelling the graph table", + "paramRequired": true + }, + { + "paramName": "md", + "paramLongName": "datasourceMasterDuplicate", + "paramDescription": "path to the file on HDFS holding the datasource id tuples [master, duplicate]", + "paramRequired": true + } +] diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index 3e35021c8..ad6ceef54 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -1002,7 +1002,8 @@ class MappersTest { @Test void testEOSCFuture_ROHub() throws IOException { - final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("photic-zone-transformed.xml"))); + final String xml = IOUtils + .toString(Objects.requireNonNull(getClass().getResourceAsStream("photic-zone-transformed.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); final OtherResearchProduct rocrate = (OtherResearchProduct) list.get(0); assertNotNull(rocrate.getEoscifguidelines()); diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/EOSCFuture_Test.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/EOSCFuture_Test.java index 08bf19fe4..3e1a501d1 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/EOSCFuture_Test.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/EOSCFuture_Test.java @@ -1,13 +1,14 @@ + package eu.dnetlib.dhp.oa.provision; -import com.fasterxml.jackson.databind.DeserializationFeature; -import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.oa.provision.model.JoinedEntity; -import eu.dnetlib.dhp.oa.provision.utils.ContextMapper; -import eu.dnetlib.dhp.oa.provision.utils.StreamingInputDocumentFactory; -import eu.dnetlib.dhp.oa.provision.utils.XmlRecordFactory; -import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct; -import eu.dnetlib.dhp.utils.saxon.SaxonTransformerFactory; +import static org.junit.jupiter.api.Assertions.assertNotNull; + +import java.io.IOException; +import java.io.StringReader; + +import javax.xml.transform.Transformer; +import javax.xml.transform.TransformerException; + import org.apache.commons.io.IOUtils; import org.apache.solr.client.solrj.util.ClientUtils; import org.apache.solr.common.SolrInputDocument; @@ -18,71 +19,73 @@ import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; -import javax.xml.transform.Transformer; -import javax.xml.transform.TransformerException; -import java.io.IOException; -import java.io.StringReader; +import com.fasterxml.jackson.databind.DeserializationFeature; +import com.fasterxml.jackson.databind.ObjectMapper; -import static org.junit.jupiter.api.Assertions.assertNotNull; +import eu.dnetlib.dhp.oa.provision.model.JoinedEntity; +import eu.dnetlib.dhp.oa.provision.utils.ContextMapper; +import eu.dnetlib.dhp.oa.provision.utils.StreamingInputDocumentFactory; +import eu.dnetlib.dhp.oa.provision.utils.XmlRecordFactory; +import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct; +import eu.dnetlib.dhp.utils.saxon.SaxonTransformerFactory; public class EOSCFuture_Test { - public static ObjectMapper OBJECT_MAPPER = new ObjectMapper() - .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); + public static ObjectMapper OBJECT_MAPPER = new ObjectMapper() + .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); - public static final String VERSION = "2021-04-15T10:05:53Z"; - public static final String DSID = "b9ee796a-c49f-4473-a708-e7d67b84c16d_SW5kZXhEU1Jlc291cmNlcy9JbmRleERTUmVzb3VyY2VUeXBl"; + public static final String VERSION = "2021-04-15T10:05:53Z"; + public static final String DSID = "b9ee796a-c49f-4473-a708-e7d67b84c16d_SW5kZXhEU1Jlc291cmNlcy9JbmRleERTUmVzb3VyY2VUeXBl"; - private ContextMapper contextMapper; + private ContextMapper contextMapper; - @BeforeEach - public void setUp() { - contextMapper = new ContextMapper(); - } + @BeforeEach + public void setUp() { + contextMapper = new ContextMapper(); + } + @Test + public void testEOSC_ROHub() throws IOException, DocumentException, TransformerException { - @Test - public void testEOSC_ROHub() throws IOException, DocumentException, TransformerException { + final ContextMapper contextMapper = new ContextMapper(); - final ContextMapper contextMapper = new ContextMapper(); + final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, + XmlConverterJob.schemaLocation); - final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, - XmlConverterJob.schemaLocation); + final OtherResearchProduct p = OBJECT_MAPPER + .readValue( + IOUtils.toString(getClass().getResourceAsStream("eosc-future/photic-zone.json")), + OtherResearchProduct.class); - final OtherResearchProduct p = OBJECT_MAPPER - .readValue(IOUtils.toString(getClass().getResourceAsStream("eosc-future/photic-zone.json")), OtherResearchProduct.class); + final String xml = xmlRecordFactory.build(new JoinedEntity<>(p)); - final String xml = xmlRecordFactory.build(new JoinedEntity<>(p)); + assertNotNull(xml); - assertNotNull(xml); + final Document doc = new SAXReader().read(new StringReader(xml)); - final Document doc = new SAXReader().read(new StringReader(xml)); + assertNotNull(doc); + System.out.println(doc.asXML()); - assertNotNull(doc); - System.out.println(doc.asXML()); + testRecordTransformation(xml); + } + private void testRecordTransformation(final String record) throws IOException, TransformerException { + final String fields = IOUtils.toString(getClass().getResourceAsStream("fields.xml")); + final String xslt = IOUtils.toString(getClass().getResourceAsStream("layoutToRecordTransformer.xsl")); - testRecordTransformation(xml); - } + final String transformer = XmlIndexingJob.getLayoutTransformer("DMF", fields, xslt); + final Transformer tr = SaxonTransformerFactory.newInstance(transformer); - private void testRecordTransformation(final String record) throws IOException, TransformerException { - final String fields = IOUtils.toString(getClass().getResourceAsStream("fields.xml")); - final String xslt = IOUtils.toString(getClass().getResourceAsStream("layoutToRecordTransformer.xsl")); + final String indexRecordXML = XmlIndexingJob.toIndexRecord(tr, record); - final String transformer = XmlIndexingJob.getLayoutTransformer("DMF", fields, xslt); + final SolrInputDocument solrDoc = new StreamingInputDocumentFactory(VERSION, DSID) + .parseDocument(indexRecordXML); - final Transformer tr = SaxonTransformerFactory.newInstance(transformer); + final String xmlDoc = ClientUtils.toXML(solrDoc); - final String indexRecordXML = XmlIndexingJob.toIndexRecord(tr, record); - - final SolrInputDocument solrDoc = new StreamingInputDocumentFactory(VERSION, DSID) - .parseDocument(indexRecordXML); - - final String xmlDoc = ClientUtils.toXML(solrDoc); - - Assertions.assertNotNull(xmlDoc); - System.out.println(xmlDoc); - } + Assertions.assertNotNull(xmlDoc); + System.out.println(xmlDoc); + } } From 11695ba649f778646ea7811f8888993532230169 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 28 Nov 2022 10:18:43 +0100 Subject: [PATCH 30/55] [graph cleaning] patch also the result's collectedfrom and hostedby datasource name according to the datasource master-duplicate mapping --- .../ReadDatasourceMasterDuplicateFromDB.java | 19 +++++++----- .../common/action/model/MasterDuplicate.java | 29 ++++++++++++------- .../graph/clean/cfhb/CleanCfHbSparkJob.java | 16 +++++----- .../oa/graph/clean/cfhb/IdCfHbMapping.java | 20 +++++++++---- 4 files changed, 55 insertions(+), 29 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/action/ReadDatasourceMasterDuplicateFromDB.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/action/ReadDatasourceMasterDuplicateFromDB.java index d9e8ced85..5d39216f1 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/action/ReadDatasourceMasterDuplicateFromDB.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/action/ReadDatasourceMasterDuplicateFromDB.java @@ -8,7 +8,6 @@ import java.nio.charset.StandardCharsets; import java.sql.ResultSet; import java.sql.SQLException; -import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; @@ -28,7 +27,9 @@ public class ReadDatasourceMasterDuplicateFromDB { private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - private static final String QUERY = "SELECT id as master, duplicate FROM dsm_dedup_services;"; + private static final String QUERY = "SELECT distinct dd.id as masterId, d.officialname as masterName, dd.duplicate as duplicateId " + + + "FROM dsm_dedup_services dd join dsm_services d on (dd.id = d.id);"; public static int execute(String dbUrl, String dbUser, String dbPassword, String hdfsPath, String hdfsNameNode) throws IOException { @@ -52,11 +53,15 @@ public class ReadDatasourceMasterDuplicateFromDB { private static MasterDuplicate datasourceMasterMap(ResultSet rs) { try { - MasterDuplicate md = new MasterDuplicate(); - final String master = rs.getString("master"); - final String duplicate = rs.getString("duplicate"); - md.setMaster(OafMapperUtils.createOpenaireId(10, master, true)); - md.setDuplicate(OafMapperUtils.createOpenaireId(10, duplicate, true)); + final MasterDuplicate md = new MasterDuplicate(); + + final String duplicateId = rs.getString("duplicateId"); + final String masterId = rs.getString("masterId"); + final String masterName = rs.getString("masterName"); + + md.setDuplicateId(OafMapperUtils.createOpenaireId(10, duplicateId, true)); + md.setMasterId(OafMapperUtils.createOpenaireId(10, masterId, true)); + md.setMasterName(masterName); return md; } catch (final SQLException e) { diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/action/model/MasterDuplicate.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/action/model/MasterDuplicate.java index b3e0d2aaa..12a4407c4 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/action/model/MasterDuplicate.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/action/model/MasterDuplicate.java @@ -8,22 +8,31 @@ import java.io.Serializable; * @Date 21/07/22 */ public class MasterDuplicate implements Serializable { - private String duplicate; - private String master; + private String duplicateId; + private String masterId; + private String masterName; - public String getDuplicate() { - return duplicate; + public String getDuplicateId() { + return duplicateId; } - public void setDuplicate(String duplicate) { - this.duplicate = duplicate; + public void setDuplicateId(String duplicateId) { + this.duplicateId = duplicateId; } - public String getMaster() { - return master; + public String getMasterId() { + return masterId; } - public void setMaster(String master) { - this.master = master; + public void setMasterId(String masterId) { + this.masterId = masterId; + } + + public String getMasterName() { + return masterName; + } + + public void setMasterName(String masterName) { + this.masterName = masterName; } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/cfhb/CleanCfHbSparkJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/cfhb/CleanCfHbSparkJob.java index ad7e252f6..d35dbc7c1 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/cfhb/CleanCfHbSparkJob.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/cfhb/CleanCfHbSparkJob.java @@ -78,6 +78,7 @@ public class CleanCfHbSparkJob { private static void cleanCfHb(SparkSession spark, String inputPath, Class entityClazz, String workingPath, String masterDuplicatePath, String outputPath) { + // read the master-duplicate tuples Dataset md = spark .read() @@ -111,7 +112,7 @@ public class CleanCfHbSparkJob { resolved .joinWith(md, resolved.col("cfhb").equalTo(md.col("duplicate"))) .map((MapFunction, IdCfHbMapping>) t -> { - t._1().setMaster(t._2().getMaster()); + t._1().setMasterId(t._2().getMasterId()); return t._1(); }, Encoders.bean(IdCfHbMapping.class)) .write() @@ -154,13 +155,13 @@ public class CleanCfHbSparkJob { @Override public T reduce(T r, IdCfHbMapping a) { - if (Objects.isNull(a) && StringUtils.isBlank(a.getMaster())) { + if (Objects.isNull(a) && StringUtils.isBlank(a.getMasterId())) { return r; } - r.getCollectedfrom().forEach(kv -> updateKey(kv, a)); + r.getCollectedfrom().forEach(kv -> updateKeyValue(kv, a)); r.getInstance().forEach(i -> { - updateKey(i.getHostedby(), a); - updateKey(i.getCollectedfrom(), a); + updateKeyValue(i.getHostedby(), a); + updateKeyValue(i.getCollectedfrom(), a); }); return r; } @@ -178,9 +179,10 @@ public class CleanCfHbSparkJob { return r; } - private void updateKey(final KeyValue kv, final IdCfHbMapping a) { + private void updateKeyValue(final KeyValue kv, final IdCfHbMapping a) { if (kv.getKey().equals(a.getCfhb())) { - kv.setKey(a.getMaster()); + kv.setKey(a.getMasterId()); + kv.setValue(a.getMasterName()); } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/cfhb/IdCfHbMapping.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/cfhb/IdCfHbMapping.java index 16d1a2613..cb4e1b5e6 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/cfhb/IdCfHbMapping.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/cfhb/IdCfHbMapping.java @@ -9,7 +9,9 @@ public class IdCfHbMapping implements Serializable { private String cfhb; - private String master; + private String masterId; + + private String masterName; public IdCfHbMapping() { } @@ -34,11 +36,19 @@ public class IdCfHbMapping implements Serializable { this.cfhb = cfhb; } - public String getMaster() { - return master; + public String getMasterId() { + return masterId; } - public void setMaster(String master) { - this.master = master; + public void setMasterId(String masterId) { + this.masterId = masterId; + } + + public String getMasterName() { + return masterName; + } + + public void setMasterName(String masterName) { + this.masterName = masterName; } } From 9c70c5dbd636fc2139943610b5838075f071ebec Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 28 Nov 2022 14:51:20 +0100 Subject: [PATCH 31/55] [Bulk Tag horizontal] added new path in definition of constraint (to recognize fos subjects) - changed test and resource class to test this new aspect --- .../community/QueryInformationSystem.java | 3 + .../dnetlib/dhp/bulktag/BulkTagJobTest.java | 8 +- .../communityconfiguration/tagging_conf.xml | 85 +++++++++++++++++++ .../dataset_10.json | 4 +- 4 files changed, 95 insertions(+), 5 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/QueryInformationSystem.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/QueryInformationSystem.java index f06c0d47a..ec89edc63 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/QueryInformationSystem.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/QueryInformationSystem.java @@ -26,6 +26,9 @@ public class QueryInformationSystem { + " return " + " " + " { $x//CONFIGURATION/context/@id} " + + " " + + "{$x//CONFIGURATION/context/param[./@name='advancedConstraints']/text() }" + + "" + " " + " {for $y in tokenize($subj,',') " + " return " diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java index eaf34c824..364e53376 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java @@ -46,7 +46,9 @@ public class BulkTagJobTest { + " \"orcid\" : \"$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']\"," + " \"contributor\" : \"$['contributor'][*]['value']\"," + " \"description\" : \"$['description'][*]['value']\", " - + " \"subject\" :\"$['subject'][*]['value']\" }"; + + " \"subject\" :\"$['subject'][*]['value']\" , " + + + "\"fos\" : \"$['subject'][?(@['qualifier']['classid']=='subject:fos')].value\"} "; private static SparkSession spark; @@ -770,14 +772,14 @@ public class BulkTagJobTest { org.apache.spark.sql.Dataset idExplodeCommunity = spark.sql(query); idExplodeCommunity.show(false); - Assertions.assertEquals(4, idExplodeCommunity.count()); + Assertions.assertEquals(5, idExplodeCommunity.count()); Assertions .assertEquals( 3, idExplodeCommunity.filter("provenance = 'community:datasource'").count()); Assertions .assertEquals( - 1, idExplodeCommunity.filter("provenance = 'community:advconstraint'").count()); + 2, idExplodeCommunity.filter("provenance = 'community:advconstraint'").count()); } } diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/communityconfiguration/tagging_conf.xml b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/communityconfiguration/tagging_conf.xml index 4e580edf5..b2e33df12 100644 --- a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/communityconfiguration/tagging_conf.xml +++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/communityconfiguration/tagging_conf.xml @@ -844,6 +844,89 @@ + + { + "criteria": [ + { + "constraint": [ + { + "verb": "equals_caseinsensitive", + "field": "subject", + "value": "North America" + }, + { + "verb": "contains", + "field": "fos", + "value": "05" + } + ] + }, + { + "constraint": [ + { + "verb": "equals_caseinsensitive", + "field": "subject", + "value": "North America" + }, + { + "verb": "contains", + "field": "fos", + "value": "06" + } + ] + }, + { + "constraint": [ + { + "verb": "equals_caseinsensitive", + "field": "subject", + "value": "Mexico" + }, + { + "verb": "equals_caseinsensitive", + "field": "subject", + "value": "United States" + }, + { + "verb": "equals_caseinsensitive", + "field": "subject", + "value": "Canada" + }, + { + "verb": "contains", + "field": "fos", + "value": "05" + } + ] + }, + { + "constraint": [ + { + "verb": "equals_caseinsensitive", + "field": "subject", + "value": "Mexico" + }, + { + "verb": "equals_caseinsensitive", + "field": "subject", + "value": "United States" + }, + { + "verb": "equals_caseinsensitive", + "field": "subject", + "value": "Canada" + }, + { + "verb": "contains", + "field": "fos", + "value": "06" + } + ] + } + ] + } + + @@ -1174,7 +1257,9 @@
+ + diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/sample/dataset/update_datasourcewithconstraints/dataset_10.json b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/sample/dataset/update_datasourcewithconstraints/dataset_10.json index 168343401..17553eb83 100644 --- a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/sample/dataset/update_datasourcewithconstraints/dataset_10.json +++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/sample/dataset/update_datasourcewithconstraints/dataset_10.json @@ -6,5 +6,5 @@ {"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1585055869498,"id":"50|od______3989::2f4f3c820c450bd08dac08d07cc82dcf","originalId":["od______3989::2f4f3c820c450bd08dac08d07cc82dcf"],"collectedfrom":[{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null}],"pid":[],"dateofcollection":"2020-03-03T13:05:34.806Z","dateoftransformation":"2020-03-03T13:06:54.229Z","extraInfo":[],"oaiprovenance":{"originDescription":{"harvestDate":"2020-03-03T13:05:34.806Z","altered":true,"baseURL":"http%3A%2F%2Frepositorio.ucjc.edu%2Foai%2Frequest","identifier":"oai:repositorio.ucjc.edu:20.500.12020/797","datestamp":"2018-11-14T12:12:23Z","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"author":[{"fullname":"Cuesta Cano, Laura","name":"Laura","surname":"Cuesta Cano","rank":1,"pid":null,"affiliation":null}],"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"esl/spa","classname":"Spanish","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[{"value":"Ciencias de la Comunicación","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Marketing digital","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Medios sociales","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Marcas","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Empresas","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Emprendedores","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"II Liga de Debate CICAE - UCJC","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"6114.13 Marketing","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[{"value":"Laura Cuesta - Liga de Debate CICAE - UCJC","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[],"description":[{"value":"Laura Cuesta, profesora en la Universidad Camilo José Cela (UCJC) y Especialista en Marketing Digital y Medios Sociales para marcas y emprendedores. \n\nEn esta ocasión, Laura Cuesta, nos explica el significado de redes sociales y nos habla sobre el uso que le dan los usuarios.\n\nII Liga de Debate CICAE - UCJC http://ligacicae.ucjc.edu/\n\nLa Universidad Camilo José Cela (UCJC) y la Asociación de Colegios Privados e Independientes –Círculo de Calidad Educativa (CICAE) – organizan la segunda edición de esta gran liga de debate académico. El formato es al estilo inglés, similar al de la Oxford Union, donde algunos jóvenes se reunían a debatir sobre temas de la más candente actualidad y polemizaban con la intención de formarse en tan valoradas habilidades.\n\nLa Liga de Debate CICAE-UCJC es una ocasión única para los alumnos de ponerse en contacto con herramientas tan útiles como la oratoria y la comunicación efectiva. Pero, sobre todo, una oportunidad de fomentar el pensamiento crítico y la socialización entre alumnos con espíritu emprendedor. La Liga de Debate CICAE - Universidad Camilo José Cela se diferencia del resto de torneos en que potenciamos que sea eminentemente formativa. Alumnos y profesores reciben formación en comunicación, oratoria y debate por parte de alguno de los expertos en debate españoles más reconocidos en la escena internacional.\n\nDatos de la III Liga de Debate CICAE - UCJC http://ligacicae.ucjc.edu/","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2018-10-23","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":null,"embargoenddate":null,"source":[],"fulltext":[],"format":[],"contributor":[{"value":"Universidad Camilo José Cela, Vicerrectorado de Innovación","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"resourcetype":null,"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":{"value":"http://creativecommons.org/licenses/by-nc-nd/4.0/","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"instancetype":{"classid":"0024","classname":"Film","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null},"url":["http://hdl.handle.net/20.500.12020/797"],"distributionlocation":"","collectedfrom":{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null},"dateofacceptance":{"value":"2018-10-23","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"storagedate":null,"device":null,"size":null,"version":null,"lastmetadataupdate":null,"metadataversionnumber":null,"geolocation":[]} {"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1585055870582,"id":"50|od______3989::752fd0b2bbac1ea1cc50e52fd46eb663","originalId":["od______3989::752fd0b2bbac1ea1cc50e52fd46eb663"],"collectedfrom":[{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null}],"pid":[],"dateofcollection":"2020-03-03T13:05:34.561Z","dateoftransformation":"2020-03-03T13:06:55.716Z","extraInfo":[],"oaiprovenance":{"originDescription":{"harvestDate":"2020-03-03T13:05:34.561Z","altered":true,"baseURL":"http%3A%2F%2Frepositorio.ucjc.edu%2Foai%2Frequest","identifier":"oai:repositorio.ucjc.edu:20.500.12020/798","datestamp":"2018-11-14T12:57:26Z","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"author":[{"fullname":"Universidad Camilo José Cela, Vicerrectorado de Innovación","name":"Vicerrectorado Innovación","surname":"Universidad Camilo José Cela","rank":1,"pid":null,"affiliation":null}],"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"esl/spa","classname":"Spanish","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[{"value":"Psicología","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Alcohol","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Dependencia","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Proyectos de investigación","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Alcoholismo","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Realidad virtual","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Psicología conductual","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[{"value":"Investigación - Intervención basada en realidad virtual sobre la dependencia del alcohol","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[],"description":[{"value":"Proyecto de investigación que se están desarrollando en la Universidad Camilo José Cela (UCJC) con tecnología aplicada al abandono de la dependencia del alcohol. \nRosa Jurado, profesora e investigadora de UCJC https://www.ucjc.edu/, explica cómo se conjuga el alcoholismo con la realidad virtual. \"Mi investigación consiste en desarrollar una técnica de intervención o rehabilitación que tiene como propósito fomentar la capacidad de inhibición para que las personas que tienen dependencia del alcohol sean capaces de controlar su conducta de aproximación hacia aquello que tiene que ver con el consumo de alcohol\".\n\nEn esta investigación, liderada por la Universidad Camilo José Cela, están participando, el Instituto de Investigación Biomédica del Hospital 12 de Octubre y la Universidad Politécnica de Madrid, con el CeDint. \"En nuestro equipo de investigación pensamos que, además del deseo de consumo, existen una carencia clara de capacidad de control inhibitorio en este tipo de personas\", destaca Rosa Jurado.\n\nLa realidad virtual sirve, según explica Gabriel Rubio, jefe de Psiquiatría del Hospital 12 de Octubre, \"para dar un paso más, para ver cómo reacciona un sujeto en un ambiente que no es el real, pero se parece mucho al real\". El objetivo final de estas acciones es \"mejorar la capacidad del individuo para poner en marcha mecanismos de inhibición conductual, para que el sujeto sea capaz de decir que no\".\n\nY no acaba ahí el proceso. La fase final del tratamiento se enfoca en que el paciente \"sea capaz de reconstruir su vida\" a través de programas de entrenamiento de habilidades sociales, asertividad, familias... \"Juntando todas las áreas de intervención, la abstinencia se mantiene mucho más tiempo y las recaídas disminuyen\".","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2017-10-05","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":null,"embargoenddate":null,"source":[],"fulltext":[],"format":[],"contributor":[{"value":"Universidad Camilo José Cela, Vicerrectorado de Innovación","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"resourcetype":null,"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":{"value":"http://creativecommons.org/licenses/by-nc-nd/4.0/","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"instancetype":{"classid":"0024","classname":"Film","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null},"url":["http://hdl.handle.net/20.500.12020/798"],"distributionlocation":"","collectedfrom":{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null},"dateofacceptance":{"value":"2017-10-05","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"storagedate":null,"device":null,"size":null,"version":null,"lastmetadataupdate":null,"metadataversionnumber":null,"geolocation":[]} {"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1585055870750,"id":"50|od______3989::7fcbe3a03280663cddebfd3cb9203177","originalId":["od______3989::7fcbe3a03280663cddebfd3cb9203177"],"collectedfrom":[{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null}],"pid":[],"dateofcollection":"2020-03-03T13:05:25.652Z","dateoftransformation":"2020-03-03T13:06:55.95Z","extraInfo":[],"oaiprovenance":{"originDescription":{"harvestDate":"2020-03-03T13:05:25.652Z","altered":true,"baseURL":"http%3A%2F%2Frepositorio.ucjc.edu%2Foai%2Frequest","identifier":"oai:repositorio.ucjc.edu:20.500.12020/796","datestamp":"2018-11-05T13:36:54Z","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"author":[{"fullname":"Universidad Camilo José Cela, Vicerrectorado de Innovación","name":"Vicerrectorado Innovación","surname":"Universidad Camilo José Cela","rank":1,"pid":null,"affiliation":null}],"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"esl/spa","classname":"Spanish","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[{"value":"Ciencias de la Comunicación","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Redes sociales","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Comunicación","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Información","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Desinformación","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"III Liga de Debate CICAE","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Instantaneidad","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Información periodística","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Fake news","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"5910.02 Medios de Comunicación de Masas","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[{"value":"Jorge Gallardo - Liga de Debate CICAE","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[],"description":[{"value":"Jorge Gallardo, profesor de Derecho en la Universidad Camilo José Cela (UCJC), Doctor en Comunicación Audiovisual y MBA en Empresas Audiovisuales. Subdirector de Espejo Público en Antena 3 Noticias. \n\nEn esta ocasión, Jorge Gallardo habla sobre las redes sociales, cómo han transformado la manera en la que nos comunicamos y nos informamos. \n\nIII Liga de Debate CICAE - UCJC http://ligacicae.ucjc.edu/\n\nLa Universidad Camilo José Cela (UCJC) y la Asociación de Colegios Privados e Independientes –Círculo de Calidad Educativa (CICAE) – organizan la tercera edición de esta gran liga de debate académico. El formato es al estilo inglés, similar al de la Oxford Union, donde algunos jóvenes se reunían a debatir sobre temas de la más candente actualidad y polemizaban con la intención de formarse en tan valoradas habilidades.\n\nLa Liga de Debate CICAE-UCJC es una ocasión única para los alumnos de ponerse en contacto con herramientas tan útiles como la oratoria y la comunicación efectiva. Pero, sobre todo, una oportunidad de fomentar el pensamiento crítico y la socialización entre alumnos con espíritu emprendedor. La Liga de Debate CICAE - Universidad Camilo José Cela se diferencia del resto de torneos en que potenciamos que sea eminentemente formativa. Alumnos y profesores reciben formación en comunicación, oratoria y debate por parte de alguno de los expertos en debate españoles más reconocidos en la escena internacional.\n\nDatos de la III Liga de Debate CICAE - UCJC http://ligacicae.ucjc.edu/","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2018-10-23","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":null,"embargoenddate":null,"source":[],"fulltext":[],"format":[],"contributor":[],"resourcetype":null,"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":{"value":"http://creativecommons.org/licenses/by-nc-nd/4.0/","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"instancetype":{"classid":"0024","classname":"Film","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null},"url":["http://hdl.handle.net/20.500.12020/796"],"distributionlocation":"","collectedfrom":{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null},"dateofacceptance":{"value":"2018-10-23","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"storagedate":null,"device":null,"size":null,"version":null,"lastmetadataupdate":null,"metadataversionnumber":null,"geolocation":[]} -{"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1585055872001,"id":"50|od______3989::d791339867bec6d3eb2104deeb4e4961","originalId":["od______3989::d791339867bec6d3eb2104deeb4e4961"],"collectedfrom":[{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null}],"pid":[],"dateofcollection":"2020-03-03T13:05:34.586Z","dateoftransformation":"2020-03-03T13:06:57.721Z","extraInfo":[],"oaiprovenance":{"originDescription":{"harvestDate":"2020-03-03T13:05:34.586Z","altered":true,"baseURL":"http%3A%2F%2Frepositorio.ucjc.edu%2Foai%2Frequest","identifier":"oai:repositorio.ucjc.edu:20.500.12020/645","datestamp":"2018-04-12T07:59:27Z","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"author":[{"fullname":"Universidad Camilo José Cela, Vicerrectorado de Innovación","name":"Vicerrectorado Innovación","surname":"Universidad Camilo José Cela","rank":1,"pid":null,"affiliation":null}],"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"esl/spa","classname":"Spanish","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[{"value":"Transferencia de Conocimiento e Innovación","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Innovación","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Investigación","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Ciencia","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Tecnología","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[{"value":"UCJC Open Science Day 2018","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[],"description":[{"value":"El UCJC Open Science Day tiene como objetivo mostrar las actividades de investigación llevadas a cabo por investigadores de la Universidad Camilo José Cela (UCJC) a todos los miembros de la UCJC.","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2018-04-10","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":null,"embargoenddate":null,"source":[],"fulltext":[],"format":[],"contributor":[],"resourcetype":null,"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":{"value":"http://creativecommons.org/licenses/by-nc-nd/4.0/","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"instancetype":{"classid":"0024","classname":"Film","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null},"url":["http://hdl.handle.net/20.500.12020/645"],"distributionlocation":"","collectedfrom":{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null},"dateofacceptance":{"value":"2018-04-10","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"storagedate":null,"device":null,"size":null,"version":null,"lastmetadataupdate":null,"metadataversionnumber":null,"geolocation":[]} -{"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1585055872022,"id":"50|od______3989::d90d3a1f64ad264b5ebed8a35b280343","originalId":["od______3989::d90d3a1f64ad264b5ebed8a35b280343"],"collectedfrom":[{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null}],"pid":[],"dateofcollection":"2020-03-03T13:05:34.912Z","dateoftransformation":"2020-03-03T13:06:57.747Z","extraInfo":[],"oaiprovenance":{"originDescription":{"harvestDate":"2020-03-03T13:05:34.912Z","altered":true,"baseURL":"http%3A%2F%2Frepositorio.ucjc.edu%2Foai%2Frequest","identifier":"oai:repositorio.ucjc.edu:20.500.12020/795","datestamp":"2018-11-06T15:43:10Z","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"author":[{"fullname":"Universidad Camilo José Cela, Vicerrectorado de Innovación","name":"Vicerrectorado Innovación","surname":"Universidad Camilo José Cela","rank":1,"pid":null,"affiliation":null}],"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"esl/spa","classname":"Spanish","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[{"value":"Miriam","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Unión Europea","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Desinformación","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Política","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Periodismo","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Ciudadanos europeos","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Democracias","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Estado de derecho","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Derechos humanos","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"5910.02 Medios de Comunicación de Masas","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[{"value":"Ramón Luis Varcárcel - La Unión Europea ante el reto de la desinformación","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[],"description":[{"value":"Conferencia de Ramón Luis Valcárcel, vicepresidente del Parlamento Europeo, en el Campus de Almagro de la Universidad Camilo José Cela (UCJC) bajo el título \"La Unión Europea ante el reto de la desinformación\", en la que ha alertado sobre el alcance de la desinformación en las democracias y sobre cómo pone en peligro los derechos de los ciudadanos europeos. También ha participado el rector de la UCJC, Samuel Martín-Barbero.","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2018-10-11","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":null,"embargoenddate":null,"source":[],"fulltext":[],"format":[],"contributor":[],"resourcetype":null,"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":{"value":"http://creativecommons.org/licenses/by-nc-nd/4.0/","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"instancetype":{"classid":"0024","classname":"Film","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null},"url":["http://hdl.handle.net/20.500.12020/795"],"distributionlocation":"","collectedfrom":{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null},"dateofacceptance":{"value":"2018-10-11","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"storagedate":null,"device":null,"size":null,"version":null,"lastmetadataupdate":null,"metadataversionnumber":null,"geolocation":[]} +{"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1585055872001,"id":"50|od______3989::d791339867bec6d3eb2104deeb4e4961","originalId":["od______3989::d791339867bec6d3eb2104deeb4e4961"],"collectedfrom":[{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null}],"pid":[],"dateofcollection":"2020-03-03T13:05:34.586Z","dateoftransformation":"2020-03-03T13:06:57.721Z","extraInfo":[],"oaiprovenance":{"originDescription":{"harvestDate":"2020-03-03T13:05:34.586Z","altered":true,"baseURL":"http%3A%2F%2Frepositorio.ucjc.edu%2Foai%2Frequest","identifier":"oai:repositorio.ucjc.edu:20.500.12020/645","datestamp":"2018-04-12T07:59:27Z","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"author":[{"fullname":"Universidad Camilo José Cela, Vicerrectorado de Innovación","name":"Vicerrectorado Innovación","surname":"Universidad Camilo José Cela","rank":1,"pid":null,"affiliation":null}],"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"esl/spa","classname":"Spanish","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[{"value":"North America","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"0501 fake fos","qualifier":{"classid":"subject:fos","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Investigación","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Ciencia","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Tecnología","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[{"value":"UCJC Open Science Day 2018","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[],"description":[{"value":"El UCJC Open Science Day tiene como objetivo mostrar las actividades de investigación llevadas a cabo por investigadores de la Universidad Camilo José Cela (UCJC) a todos los miembros de la UCJC.","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2018-04-10","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":null,"embargoenddate":null,"source":[],"fulltext":[],"format":[],"contributor":[],"resourcetype":null,"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":{"value":"http://creativecommons.org/licenses/by-nc-nd/4.0/","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"instancetype":{"classid":"0024","classname":"Film","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null},"url":["http://hdl.handle.net/20.500.12020/645"],"distributionlocation":"","collectedfrom":{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null},"dateofacceptance":{"value":"2018-04-10","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"storagedate":null,"device":null,"size":null,"version":null,"lastmetadataupdate":null,"metadataversionnumber":null,"geolocation":[]} +{"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1585055872022,"id":"50|od______3989::d90d3a1f64ad264b5ebed8a35b280343","originalId":["od______3989::d90d3a1f64ad264b5ebed8a35b280343"],"collectedfrom":[{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null}],"pid":[],"dateofcollection":"2020-03-03T13:05:34.912Z","dateoftransformation":"2020-03-03T13:06:57.747Z","extraInfo":[],"oaiprovenance":{"originDescription":{"harvestDate":"2020-03-03T13:05:34.912Z","altered":true,"baseURL":"http%3A%2F%2Frepositorio.ucjc.edu%2Foai%2Frequest","identifier":"oai:repositorio.ucjc.edu:20.500.12020/795","datestamp":"2018-11-06T15:43:10Z","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"author":[{"fullname":"Universidad Camilo José Cela, Vicerrectorado de Innovación","name":"Vicerrectorado Innovación","surname":"Universidad Camilo José Cela","rank":1,"pid":null,"affiliation":null}],"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"esl/spa","classname":"Spanish","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[{"value":"Miriam","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Unión Europea","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Desinformación","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Política","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Periodismo","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Ciudadanos europeos","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Democracias","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Estado de derecho","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Derechos humanos","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"5910.02 Medios de Comunicación de Masas","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[{"value":"Ramón Luis Varcárcel - La Unión Europea ante el reto de la desinformación","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[],"description":[{"value":"Conferencia de Ramón Luis Valcárcel, vicepresidente del Parlamento Europeo, en el Campus de Almagro de la Universidad Camilo José Cela (UCJC) bajo el título \"La Unión Europea ante el reto de la desinformación\", en la que ha alertado sobre el alcance de la desinformación en las democracias y sobre cómo pone en peligro los derechos de los ciudadanos europeos. También ha participado el rector de la UCJC, Samuel Martín-Barbero.","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2018-10-11","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":null,"embargoenddate":null,"source":[],"fulltext":[],"format":[],"contributor":[],"resourcetype":null,"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":{"value":"http://creativecommons.org/licenses/by-nc-nd/4.0/","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"instancetype":{"classid":"0024","classname":"Film","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null},"url":["http://hdl.handle.net/20.500.12020/795"],"distributionlocation":"","collectedfrom":{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null},"dateofacceptance":{"value":"2018-10-11","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"storagedate":null,"device":null,"size":null,"version":null,"lastmetadataupdate":null,"metadataversionnumber":null,"geolocation":[]} \ No newline at end of file From 58c05731f9d3b3d472b828a092c73f6aa78703ff Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 29 Nov 2022 11:21:51 +0100 Subject: [PATCH 32/55] [graph cleaning] WIP: testing the collectedfron and hostedby patch procedure --- .../oa/graph/clean/CleanContextSparkJob.java | 12 +- .../graph/clean/cfhb/CleanCfHbSparkJob.java | 151 ++++++++---------- .../oa/graph/clean/cfhb/IdCfHbMapping.java | 12 +- .../clean/country/CleanCountrySparkJob.java | 12 +- .../country/GetDatasourceFromCountry.java | 8 +- .../dhp/oa/graph/clean/oozie_app/workflow.xml | 34 ++-- .../oa/graph/input_clean_cfhb_parameters.json | 6 + .../graph/input_clean_context_parameters.json | 4 +- .../graph/input_clean_country_parameters.json | 4 +- .../input_datasource_country_parameters.json | 4 +- .../clean/cfhb/CleanCfHbSparkJobTest.java | 118 ++++++++++++++ .../clean/cfhb/entities/dataset/dataset.json | 3 + .../oa/graph/clean/cfhb/masterduplicate.json | 4 + 13 files changed, 243 insertions(+), 129 deletions(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/cfhb/CleanCfHbSparkJobTest.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/cfhb/entities/dataset/dataset.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/cfhb/masterduplicate.json diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleanContextSparkJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleanContextSparkJob.java index 55fdbac59..10a3d4465 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleanContextSparkJob.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleanContextSparkJob.java @@ -47,8 +47,8 @@ public class CleanContextSparkJob implements Serializable { String inputPath = parser.get("inputPath"); log.info("inputPath: {}", inputPath); - String workingPath = parser.get("workingPath"); - log.info("workingPath: {}", workingPath); + String workingDir = parser.get("workingDir"); + log.info("workingDir: {}", workingDir); String contextId = parser.get("contextId"); log.info("contextId: {}", contextId); @@ -67,12 +67,12 @@ public class CleanContextSparkJob implements Serializable { isSparkSessionManaged, spark -> { - cleanContext(spark, contextId, verifyParam, inputPath, entityClazz, workingPath); + cleanContext(spark, contextId, verifyParam, inputPath, entityClazz, workingDir); }); } private static void cleanContext(SparkSession spark, String contextId, String verifyParam, - String inputPath, Class entityClazz, String workingPath) { + String inputPath, Class entityClazz, String workingDir) { Dataset res = spark .read() .textFile(inputPath) @@ -106,11 +106,11 @@ public class CleanContextSparkJob implements Serializable { .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") - .json(workingPath); + .json(workingDir); spark .read() - .textFile(workingPath) + .textFile(workingDir) .map( (MapFunction) value -> OBJECT_MAPPER.readValue(value, entityClazz), Encoders.bean(entityClazz)) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/cfhb/CleanCfHbSparkJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/cfhb/CleanCfHbSparkJob.java index d35dbc7c1..b4678cc6c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/cfhb/CleanCfHbSparkJob.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/cfhb/CleanCfHbSparkJob.java @@ -3,8 +3,11 @@ package eu.dnetlib.dhp.oa.graph.clean.cfhb; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import java.util.Iterator; +import java.util.List; import java.util.Objects; import java.util.Optional; +import java.util.stream.Collectors; import java.util.stream.Stream; import org.apache.commons.io.IOUtils; @@ -12,8 +15,10 @@ import org.apache.commons.lang3.StringUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FlatMapFunction; import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.api.java.function.MapGroupsFunction; import org.apache.spark.sql.*; import org.apache.spark.sql.expressions.Aggregator; +import org.jetbrains.annotations.NotNull; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -52,14 +57,14 @@ public class CleanCfHbSparkJob { String inputPath = parser.get("inputPath"); log.info("inputPath: {}", inputPath); - String workingPath = parser.get("workingPath"); - log.info("workingPath: {}", workingPath); + String resolvedPath = parser.get("resolvedPath"); + log.info("resolvedPath: {}", resolvedPath); String outputPath = parser.get("outputPath"); log.info("outputPath: {}", outputPath); - String masterDuplicatePath = parser.get("masterDuplicatePath"); - log.info("masterDuplicatePath: {}", masterDuplicatePath); + String dsMasterDuplicatePath = parser.get("datasourceMasterDuplicate"); + log.info("datasourceMasterDuplicate: {}", dsMasterDuplicatePath); String graphTableClassName = parser.get("graphTableClassName"); log.info("graphTableClassName: {}", graphTableClassName); @@ -72,12 +77,12 @@ public class CleanCfHbSparkJob { isSparkSessionManaged, spark -> { cleanCfHb( - spark, inputPath, entityClazz, workingPath, masterDuplicatePath, outputPath); + spark, inputPath, entityClazz, resolvedPath, dsMasterDuplicatePath, outputPath); }); } private static void cleanCfHb(SparkSession spark, String inputPath, Class entityClazz, - String workingPath, String masterDuplicatePath, String outputPath) { + String resolvedPath, String masterDuplicatePath, String outputPath) { // read the master-duplicate tuples Dataset md = spark @@ -85,116 +90,94 @@ public class CleanCfHbSparkJob { .textFile(masterDuplicatePath) .map(as(MasterDuplicate.class), Encoders.bean(MasterDuplicate.class)); - // read the result table - Dataset res = spark - .read() - .textFile(inputPath) - .map(as(entityClazz), Encoders.bean(entityClazz)); - // prepare the resolved CF|HB references with the corresponding EMPTY master ID - Dataset resolved = res - .flatMap( - (FlatMapFunction) r -> Stream - .concat( - r.getCollectedfrom().stream().map(KeyValue::getKey), - Stream + Dataset resolved = spark + .read() + .textFile(inputPath) + .map(as(entityClazz), Encoders.bean(entityClazz)) + .flatMap( + (FlatMapFunction) r -> { + final List list = Stream .concat( - r.getInstance().stream().map(Instance::getHostedby).map(KeyValue::getKey), - r.getInstance().stream().map(Instance::getCollectedfrom).map(KeyValue::getKey))) - .distinct() - .map(s -> asIdCfHbMapping(r.getId(), s)) - .iterator(), + r.getCollectedfrom().stream().map(KeyValue::getKey), + Stream + .concat( + r.getInstance().stream().map(Instance::getHostedby).map(KeyValue::getKey), + r.getInstance().stream().map(Instance::getCollectedfrom).map(KeyValue::getKey))) + .distinct() + .map(s -> asIdCfHbMapping(r.getId(), s)) + .collect(Collectors.toList()); + return list.iterator(); + }, Encoders.bean(IdCfHbMapping.class)); - final String resolvedPath = workingPath + "/cfHbResolved"; - - // set the EMPTY master ID and save it aside + // set the EMPTY master ID/NAME and save it resolved - .joinWith(md, resolved.col("cfhb").equalTo(md.col("duplicate"))) + .joinWith(md, resolved.col("cfhb").equalTo(md.col("duplicateId"))) .map((MapFunction, IdCfHbMapping>) t -> { t._1().setMasterId(t._2().getMasterId()); + t._1().setMasterName(t._2().getMasterName()); return t._1(); }, Encoders.bean(IdCfHbMapping.class)) .write() .mode(SaveMode.Overwrite) - .parquet(resolvedPath); + .json(resolvedPath); // read again the resolved CF|HB mapping Dataset resolvedDS = spark .read() - .load(resolvedPath) - .as(Encoders.bean(IdCfHbMapping.class)); + .textFile(resolvedPath) + .map(as(IdCfHbMapping.class), Encoders.bean(IdCfHbMapping.class)); + + // read the result table + Dataset res = spark + .read() + .textFile(inputPath) + .map(as(entityClazz), Encoders.bean(entityClazz)); // Join the results with the resolved CF|HB mapping, apply the mapping and save it res - .joinWith(resolvedDS, res.col("id").equalTo(resolved.col("resultId")), "left") + .joinWith(resolvedDS, res.col("id").equalTo(resolvedDS.col("resultId")), "left") .groupByKey((MapFunction, String>) t -> t._1().getId(), Encoders.STRING()) - .agg(new IdCfHbMappingAggregator(entityClazz).toColumn()) + .mapGroups(getMapGroupsFunction(), Encoders.bean(entityClazz)) + //.agg(new IdCfHbMappingAggregator(entityClazz).toColumn()) .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") .json(outputPath); } - public static class IdCfHbMappingAggregator extends Aggregator { + @NotNull + private static MapGroupsFunction, T> getMapGroupsFunction() { + return new MapGroupsFunction, T>() { + @Override + public T call(String key, Iterator> values) throws Exception { + final Tuple2 first = values.next(); + final T res = first._1(); - private final Class entityClazz; - - public IdCfHbMappingAggregator(Class entityClazz) { - this.entityClazz = entityClazz; - } - - @Override - public T zero() { - try { - return entityClazz.newInstance(); - } catch (InstantiationException | IllegalAccessException e) { - throw new RuntimeException(e); + updateResult(res, first._2()); + values.forEachRemaining(t -> updateResult(res, t._2())); + return res; } - } - @Override - public T reduce(T r, IdCfHbMapping a) { - if (Objects.isNull(a) && StringUtils.isBlank(a.getMasterId())) { - return r; + private void updateResult(T res, IdCfHbMapping m) { + if (Objects.nonNull(m)) { + res.getCollectedfrom().forEach(kv -> updateKeyValue(kv, m)); + res.getInstance().forEach(i -> { + updateKeyValue(i.getHostedby(), m); + updateKeyValue(i.getCollectedfrom(), m); + }); + } } - r.getCollectedfrom().forEach(kv -> updateKeyValue(kv, a)); - r.getInstance().forEach(i -> { - updateKeyValue(i.getHostedby(), a); - updateKeyValue(i.getCollectedfrom(), a); - }); - return r; - } - @Override - public T merge(T b1, T b2) { - if (Objects.isNull(b1.getId())) { - return b2; + private void updateKeyValue(final KeyValue kv, final IdCfHbMapping a) { + if (kv.getKey().equals(a.getCfhb())) { + kv.setKey(a.getMasterId()); + kv.setValue(a.getMasterName()); + } } - return b1; - } - @Override - public T finish(T r) { - return r; - } - - private void updateKeyValue(final KeyValue kv, final IdCfHbMapping a) { - if (kv.getKey().equals(a.getCfhb())) { - kv.setKey(a.getMasterId()); - kv.setValue(a.getMasterName()); - } - } - - @Override - public Encoder bufferEncoder() { - return Encoders.bean(entityClazz); - } - - @Override - public Encoder outputEncoder() { - return Encoders.bean(entityClazz); - } + }; } private static IdCfHbMapping asIdCfHbMapping(String resultId, String cfHb) { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/cfhb/IdCfHbMapping.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/cfhb/IdCfHbMapping.java index cb4e1b5e6..fad1129c5 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/cfhb/IdCfHbMapping.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/cfhb/IdCfHbMapping.java @@ -5,7 +5,7 @@ import java.io.Serializable; public class IdCfHbMapping implements Serializable { - private String resultid; + private String resultId; private String cfhb; @@ -17,15 +17,15 @@ public class IdCfHbMapping implements Serializable { } public IdCfHbMapping(String id) { - this.resultid = id; + this.resultId = id; } - public String getResultid() { - return resultid; + public String getResultId() { + return resultId; } - public void setResultid(String resultid) { - this.resultid = resultid; + public void setResultId(String resultId) { + this.resultId = resultId; } public String getCfhb() { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/country/CleanCountrySparkJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/country/CleanCountrySparkJob.java index 45590f789..d8d803458 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/country/CleanCountrySparkJob.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/country/CleanCountrySparkJob.java @@ -58,8 +58,8 @@ public class CleanCountrySparkJob implements Serializable { String inputPath = parser.get("inputPath"); log.info("inputPath: {}", inputPath); - String workingPath = parser.get("workingPath"); - log.info("workingPath: {}", workingPath); + String workingDir = parser.get("workingDir"); + log.info("workingDir: {}", workingDir); String datasourcePath = parser.get("hostedBy"); log.info("datasourcePath: {}", datasourcePath); @@ -85,12 +85,12 @@ public class CleanCountrySparkJob implements Serializable { spark -> { cleanCountry( - spark, country, verifyParam, inputPath, entityClazz, workingPath, collectedfrom, datasourcePath); + spark, country, verifyParam, inputPath, entityClazz, workingDir, collectedfrom, datasourcePath); }); } private static void cleanCountry(SparkSession spark, String country, String[] verifyParam, - String inputPath, Class entityClazz, String workingPath, String collectedfrom, String datasourcePath) { + String inputPath, Class entityClazz, String workingDir, String collectedfrom, String datasourcePath) { List hostedBy = spark .read() @@ -134,11 +134,11 @@ public class CleanCountrySparkJob implements Serializable { .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") - .json(workingPath); + .json(workingDir); spark .read() - .textFile(workingPath) + .textFile(workingDir) .map( (MapFunction) value -> OBJECT_MAPPER.readValue(value, entityClazz), Encoders.bean(entityClazz)) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/country/GetDatasourceFromCountry.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/country/GetDatasourceFromCountry.java index d3741d3e8..598fccdd7 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/country/GetDatasourceFromCountry.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/country/GetDatasourceFromCountry.java @@ -54,8 +54,8 @@ public class GetDatasourceFromCountry implements Serializable { String inputPath = parser.get("inputPath"); log.info("inputPath: {}", inputPath); - String workingPath = parser.get("workingPath"); - log.info("workingPath: {}", workingPath); + String workingPath = parser.get("workingDir"); + log.info("workingDir: {}", workingPath); String country = parser.get("country"); log.info("country: {}", country); @@ -70,7 +70,7 @@ public class GetDatasourceFromCountry implements Serializable { } private static void getDatasourceFromCountry(SparkSession spark, String country, String inputPath, - String workingPath) { + String workingDir) { Dataset organization = spark .read() @@ -100,7 +100,7 @@ public class GetDatasourceFromCountry implements Serializable { .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") - .json(workingPath); + .json(workingDir); } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/clean/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/clean/oozie_app/workflow.xml index e717fac0f..e756840bd 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/clean/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/clean/oozie_app/workflow.xml @@ -343,7 +343,7 @@ --inputPath${graphOutputPath}/publication --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Publication - --workingPath${workingDir}/working/publication + --workingDir${workingDir}/working/publication --contextId${contextId} --verifyParam${verifyParam} @@ -370,7 +370,7 @@ --inputPath${graphOutputPath}/dataset --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Dataset - --workingPath${workingDir}/working/dataset + --workingDir${workingDir}/working/dataset --contextId${contextId} --verifyParam${verifyParam} @@ -397,7 +397,7 @@ --inputPath${graphOutputPath}/otherresearchproduct --graphTableClassNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --workingPath${workingDir}/working/otherresearchproduct + --workingDir${workingDir}/working/otherresearchproduct --contextId${contextId} --verifyParam${verifyParam} @@ -424,7 +424,7 @@ --inputPath${graphOutputPath}/software --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Software - --workingPath${workingDir}/working/software + --workingDir${workingDir}/working/software --contextId${contextId} --verifyParam${verifyParam} @@ -452,7 +452,7 @@ --conf spark.sql.shuffle.partitions=7680 --inputPath${graphOutputPath} - --workingPath${workingDir}/working/hostedby + --workingDir${workingDir}/working/hostedby --country${country} @@ -485,7 +485,7 @@ --inputPath${graphOutputPath}/publication --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Publication - --workingPath${workingDir}/working/publication + --workingDir${workingDir}/working/publication --country${country} --verifyParam${verifyCountryParam} --hostedBy${workingDir}/working/hostedby @@ -514,7 +514,7 @@ --inputPath${graphOutputPath}/dataset --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Dataset - --workingPath${workingDir}/working/dataset + --workingDir${workingDir}/working/dataset --country${country} --verifyParam${verifyCountryParam} --hostedBy${workingDir}/working/hostedby @@ -543,7 +543,7 @@ --inputPath${graphOutputPath}/otherresearchproduct --graphTableClassNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --workingPath${workingDir}/working/otherresearchproduct + --workingDir${workingDir}/working/otherresearchproduct --country${country} --verifyParam${verifyCountryParam} --hostedBy${workingDir}/working/hostedby @@ -572,7 +572,7 @@ --inputPath${graphOutputPath}/software --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Software - --workingPath${workingDir}/working/software + --workingDir${workingDir}/working/software --country${country} --verifyParam${verifyCountryParam} --hostedBy${workingDir}/working/hostedby @@ -629,9 +629,9 @@ --conf spark.sql.shuffle.partitions=7680 --inputPath${graphOutputPath}/publication + --resolvedPath${workingDir}/cfHbResolved/publication --outputPath${workingPath}/cfHbPatched/publication --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Publication - --workingDir${workingDir}/working/publication --masterDuplicatePath${workingDir}/masterduplicate @@ -656,9 +656,9 @@ --conf spark.sql.shuffle.partitions=7680 --inputPath${graphOutputPath}/dataset + --resolvedPath${workingDir}/cfHbResolved/dataset --outputPath${workingPath}/cfHbPatched/dataset --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Dataset - --workingDir${workingDir}/working/dataset --masterDuplicatePath${workingDir}/masterduplicate @@ -683,9 +683,9 @@ --conf spark.sql.shuffle.partitions=7680 --inputPath${graphOutputPath}/otherresearchproduct + --resolvedPath${workingDir}/cfHbResolved/otherresearchproduct --outputPath${workingPath}/cfHbPatched/otherresearchproduct --graphTableClassNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --workingDir${workingDir}/working/otherresearchproduct --masterDuplicatePath${workingDir}/masterduplicate @@ -710,9 +710,9 @@ --conf spark.sql.shuffle.partitions=7680 --inputPath${graphOutputPath}/software + --resolvedPath${workingDir}/cfHbResolved/software --outputPath${workingPath}/cfHbPatched/software --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Software - --workingDir${workingDir}/working/software --masterDuplicatePath${workingDir}/masterduplicate @@ -733,7 +733,7 @@ - ${workingPath}/cfHbPatched/publication + ${workingDir}/cfHbPatched/publication ${graphOutputPath}/publication @@ -745,7 +745,7 @@ - ${workingPath}/cfHbPatched/dataset + ${workingDir}/cfHbPatched/dataset ${graphOutputPath}/dataset @@ -757,7 +757,7 @@ - ${workingPath}/cfHbPatched/otherresearchproduct + ${workingDir}/cfHbPatched/otherresearchproduct ${graphOutputPath}/otherresearchproduct @@ -769,7 +769,7 @@ - ${workingPath}/cfHbPatched/software + ${workingDir}/cfHbPatched/software ${graphOutputPath}/software diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/input_clean_cfhb_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/input_clean_cfhb_parameters.json index 8b8a5f70e..934d173b5 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/input_clean_cfhb_parameters.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/input_clean_cfhb_parameters.json @@ -11,6 +11,12 @@ "paramDescription": "the path to the graph data dump to read", "paramRequired": true }, + { + "paramName": "rp", + "paramLongName": "resolvedPath", + "paramDescription": "the path to store the resolved records", + "paramRequired": true + }, { "paramName": "out", "paramLongName": "outputPath", diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/input_clean_context_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/input_clean_context_parameters.json index e3d31d69f..8be6496d8 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/input_clean_context_parameters.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/input_clean_context_parameters.json @@ -12,8 +12,8 @@ "paramRequired": true }, { - "paramName": "wp", - "paramLongName": "workingPath", + "paramName": "wd", + "paramLongName": "workingDir", "paramDescription": "the path to store the output graph", "paramRequired": true }, diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/input_clean_country_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/input_clean_country_parameters.json index 318fb22f8..b38b5ac9f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/input_clean_country_parameters.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/input_clean_country_parameters.json @@ -12,8 +12,8 @@ "paramRequired": true }, { - "paramName": "wp", - "paramLongName": "workingPath", + "paramName": "wd", + "paramLongName": "workingDir", "paramDescription": "the path to store the output graph", "paramRequired": true }, diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/input_datasource_country_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/input_datasource_country_parameters.json index e0aa60328..01aa2e7b0 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/input_datasource_country_parameters.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/input_datasource_country_parameters.json @@ -12,8 +12,8 @@ "paramRequired": true }, { - "paramName": "wp", - "paramLongName": "workingPath", + "paramName": "wd", + "paramLongName": "workingDir", "paramDescription": "the path to store the output graph", "paramRequired": true }, diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/cfhb/CleanCfHbSparkJobTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/cfhb/CleanCfHbSparkJobTest.java new file mode 100644 index 000000000..680d1ff64 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/cfhb/CleanCfHbSparkJobTest.java @@ -0,0 +1,118 @@ +package eu.dnetlib.dhp.oa.graph.clean.cfhb; + +import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.schema.oaf.Dataset; +import eu.dnetlib.dhp.schema.oaf.Publication; +import org.apache.commons.io.FileUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SparkSession; +import org.junit.jupiter.api.*; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.IOException; +import java.net.URISyntaxException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; + +public class CleanCfHbSparkJobTest { + + private static final Logger log = LoggerFactory.getLogger(CleanCfHbSparkJobTest.class); + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + private static SparkSession spark; + + private static Path testBaseTmpPath; + + private static String resolvedPath; + + private static String graphInputPath; + + private static String graphOutputPath; + + private static String dsMasterDuplicatePath; + + @BeforeAll + public static void beforeAll() throws IOException, URISyntaxException { + + testBaseTmpPath = Files.createTempDirectory(CleanCfHbSparkJobTest.class.getSimpleName()); + log.info("using test base path {}", testBaseTmpPath); + + final File entitiesSources = Paths + .get(CleanCfHbSparkJobTest.class.getResource("/eu/dnetlib/dhp/oa/graph/clean/cfhb/entities").toURI()) + .toFile(); + + FileUtils + .copyDirectory( + entitiesSources, + testBaseTmpPath.resolve("input").resolve("entities").toFile()); + + FileUtils + .copyFileToDirectory( + Paths + .get(CleanCfHbSparkJobTest.class.getResource("/eu/dnetlib/dhp/oa/graph/clean/cfhb/masterduplicate.json").toURI()) + .toFile(), + testBaseTmpPath.resolve("workingDir").resolve("masterduplicate").toFile()); + + + graphInputPath = testBaseTmpPath.resolve("input").resolve("entities").toString(); + resolvedPath = testBaseTmpPath.resolve("workingDir").resolve("cfHbResolved").toString(); + graphOutputPath = testBaseTmpPath.resolve("workingDir").resolve("cfHbPatched").toString(); + dsMasterDuplicatePath = testBaseTmpPath.resolve("workingDir").resolve("masterduplicate").toString(); + + SparkConf conf = new SparkConf(); + conf.setAppName(CleanCfHbSparkJobTest.class.getSimpleName()); + + conf.setMaster("local[*]"); + conf.set("spark.driver.host", "localhost"); + conf.set("spark.ui.enabled", "false"); + + spark = SparkSession + .builder() + .appName(CleanCfHbSparkJobTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + } + + @AfterAll + public static void afterAll() throws IOException { + FileUtils.deleteDirectory(testBaseTmpPath.toFile()); + spark.stop(); + } + + @Test + void testCleanCfHbSparkJob() throws Exception { + final String outputPath = graphOutputPath + "/dataset"; + CleanCfHbSparkJob + .main( + new String[] { + "--isSparkSessionManaged", Boolean.FALSE.toString(), + "--inputPath", graphInputPath + "/dataset", + "--outputPath", outputPath, + "--resolvedPath", resolvedPath + "/dataset", + "--graphTableClassName", Dataset.class.getCanonicalName(), + "--datasourceMasterDuplicate", dsMasterDuplicatePath + }); + + //final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + Assertions.assertTrue(Files.exists(Paths.get(graphOutputPath, "dataset"))); + + final org.apache.spark.sql.Dataset d = spark + .read() + .textFile(outputPath) + .map(as(Dataset.class), Encoders.bean(Dataset.class)); + Assertions.assertEquals(3, d.count()); + + } + + private static MapFunction as(Class clazz) { + return s -> OBJECT_MAPPER.readValue(s, clazz); + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/cfhb/entities/dataset/dataset.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/cfhb/entities/dataset/dataset.json new file mode 100644 index 000000000..bf2f2d963 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/cfhb/entities/dataset/dataset.json @@ -0,0 +1,3 @@ +{"author":[{"affiliation":[],"fullname":"Greenough, B","name":"B","pid":[],"rank":1,"surname":"Greenough"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|re3data_____::4c4416659cb74c2e0e891a883a047cbc","value":"Bacterial Protein Interaction Database - DUP"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:55:00.639Z","dateoftransformation":"2021-09-25T11:00:04.201Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Heritage Education"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|doi_________::09821844208a5cd6300b2bfb13bca1b9","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-59-cjhf"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17632/96bpgw5j9d.1"}],"collectedfrom":{"key":"10|re3data_____::4c4416659cb74c2e0e891a883a047cbc","value":"Bacterial Protein Interaction Database - DUP"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"alternateIdentifier":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17632/96bpgw5j9d.1"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434801681,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T15:29:45Z","harvestDate":"2021-09-25T10:55:00.639Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:211323","metadataNamespace":""}},"originalId":["50|DansKnawCris::09821844208a5cd6300b2bfb13bca1b9","oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:211323"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Interdisciplinary sciences"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Interdisciplinary sciences"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Heritage Education"}]} +{"author":[{"affiliation":[],"fullname":"Keijers, D.M.G.","name":"D.M.G.","pid":[],"rank":1,"surname":"Keijers"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|opendoar____::788b4ac1e172d8e520c2b9461c0a3d35","value":"FILUR DATA - DUP"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:41:59.767Z","dateoftransformation":"2021-09-25T11:00:19.238Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"onderzoeksrapport"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::0dd644304b7116e8e58da3a5e3adc37a","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-das-fkq"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17026/dans-xsw-qtnx"}],"collectedfrom":{"key":"10|opendoar____::788b4ac1e172d8e520c2b9461c0a3d35","value":"FILUR DATA - DUP"},"hostedby":{"key":"10|re3data_____::6ffd7bc058f762912dc494cd9c175341","value":"depositar - DUP"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"alternateIdentifier":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17026/dans-xsw-qtnx"]}],"language":{"classid":"dut/nld","classname":"Dutch; Flemish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434847381,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T13:53:29Z","harvestDate":"2021-09-25T10:41:59.767Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:20759","metadataNamespace":""}},"originalId":["oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:20759","50|DansKnawCris::0dd644304b7116e8e58da3a5e3adc37a"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"PROSPECTIE"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Plangebied Lange Ekker te Vessem, gemeente Eersel"}]} +{"author":[],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:43:13.768Z","dateoftransformation":"2021-09-25T11:01:22.863Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"This find is registered at Portable Antiquities of the Netherlands with number PAN-00054604"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::203a27996ddc0fd1948258e5b7dec61c","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-a7-hwgy"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17026/dans-x3z-fsq5"}],"collectedfrom":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"alternateIdentifier":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17026/dans-x3z-fsq5"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434508886,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T14:01:37Z","harvestDate":"2021-09-25T10:43:13.768Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:129566","metadataNamespace":""}},"originalId":["oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:129566","50|DansKnawCris::203a27996ddc0fd1948258e5b7dec61c"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"early medieval enamelled disc brooch variant A9"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Broader Match: disc brooches"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Broader Match: schijffibula - geemailleerd"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"metal"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"copper alloy"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: Early Middle Ages C"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: Early Middle Ages D"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: 800 until 1000"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"PAN-00054604 - early medieval enamelled disc brooch variant A9"}]} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/cfhb/masterduplicate.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/cfhb/masterduplicate.json new file mode 100644 index 000000000..b63cfe6b3 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/cfhb/masterduplicate.json @@ -0,0 +1,4 @@ +{ "duplicateId" : "10|re3data_____::4c4416659cb74c2e0e891a883a047cbc", "masterId" : "10|fairsharing_::a29d1598024f9e87beab4b98411d48ce", "masterName" : "Bacterial Protein Interaction Database" } +{ "duplicateId" : "10|opendoar____::788b4ac1e172d8e520c2b9461c0a3d35", "masterId" : "10|re3data_____::fc1db64b3964826913b1e9eafe830490", "masterName" : "FULIR Data" } +{ "duplicateId" : "10|re3data_____::6ffd7bc058f762912dc494cd9c175341", "masterId" : "10|fairsharing_::3f647cadf56541fb9513cb63ec370187", "masterName" : "depositar" } +{ "duplicateId" : "10|scindeksserb::07022f78a8cc6d1171092454ecdbb47c", "masterId" : "10|doajarticles::07022f78a8cc6d1171092454ecdbb47c", "masterName" : "Artefact" } \ No newline at end of file From 8e3edba318b7573f24bcf9300a2c115bf2074e34 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 29 Nov 2022 16:07:09 +0100 Subject: [PATCH 33/55] [graph cleaning] testing the collectedfron and hostedby patch procedure --- .../graph/clean/cfhb/CleanCfHbSparkJob.java | 63 +++-- .../clean/cfhb/CleanCfHbSparkJobTest.java | 253 ++++++++++++------ 2 files changed, 208 insertions(+), 108 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/cfhb/CleanCfHbSparkJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/cfhb/CleanCfHbSparkJob.java index b4678cc6c..122e27dec 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/cfhb/CleanCfHbSparkJob.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/cfhb/CleanCfHbSparkJob.java @@ -25,11 +25,13 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.common.action.model.MasterDuplicate; import eu.dnetlib.dhp.oa.graph.clean.country.CleanCountrySparkJob; import eu.dnetlib.dhp.schema.oaf.Instance; import eu.dnetlib.dhp.schema.oaf.KeyValue; import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.utils.DHPUtils; import scala.Tuple2; public class CleanCfHbSparkJob { @@ -76,6 +78,8 @@ public class CleanCfHbSparkJob { conf, isSparkSessionManaged, spark -> { + HdfsSupport.remove(outputPath, spark.sparkContext().hadoopConfiguration()); + HdfsSupport.remove(resolvedPath, spark.sparkContext().hadoopConfiguration()); cleanCfHb( spark, inputPath, entityClazz, resolvedPath, dsMasterDuplicatePath, outputPath); }); @@ -92,33 +96,15 @@ public class CleanCfHbSparkJob { // prepare the resolved CF|HB references with the corresponding EMPTY master ID Dataset resolved = spark - .read() - .textFile(inputPath) - .map(as(entityClazz), Encoders.bean(entityClazz)) - .flatMap( - (FlatMapFunction) r -> { - final List list = Stream - .concat( - r.getCollectedfrom().stream().map(KeyValue::getKey), - Stream - .concat( - r.getInstance().stream().map(Instance::getHostedby).map(KeyValue::getKey), - r.getInstance().stream().map(Instance::getCollectedfrom).map(KeyValue::getKey))) - .distinct() - .map(s -> asIdCfHbMapping(r.getId(), s)) - .collect(Collectors.toList()); - return list.iterator(); - }, - Encoders.bean(IdCfHbMapping.class)); + .read() + .textFile(inputPath) + .map(as(entityClazz), Encoders.bean(entityClazz)) + .flatMap(flattenCfHbFn(), Encoders.bean(IdCfHbMapping.class)); // set the EMPTY master ID/NAME and save it resolved .joinWith(md, resolved.col("cfhb").equalTo(md.col("duplicateId"))) - .map((MapFunction, IdCfHbMapping>) t -> { - t._1().setMasterId(t._2().getMasterId()); - t._1().setMasterName(t._2().getMasterName()); - return t._1(); - }, Encoders.bean(IdCfHbMapping.class)) + .map(asIdCfHbMapping(), Encoders.bean(IdCfHbMapping.class)) .write() .mode(SaveMode.Overwrite) .json(resolvedPath); @@ -131,27 +117,46 @@ public class CleanCfHbSparkJob { // read the result table Dataset res = spark - .read() - .textFile(inputPath) - .map(as(entityClazz), Encoders.bean(entityClazz)); + .read() + .textFile(inputPath) + .map(as(entityClazz), Encoders.bean(entityClazz)); // Join the results with the resolved CF|HB mapping, apply the mapping and save it res .joinWith(resolvedDS, res.col("id").equalTo(resolvedDS.col("resultId")), "left") .groupByKey((MapFunction, String>) t -> t._1().getId(), Encoders.STRING()) .mapGroups(getMapGroupsFunction(), Encoders.bean(entityClazz)) - //.agg(new IdCfHbMappingAggregator(entityClazz).toColumn()) .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") .json(outputPath); } - @NotNull + private static MapFunction, IdCfHbMapping> asIdCfHbMapping() { + return t -> { + t._1().setMasterId(t._2().getMasterId()); + t._1().setMasterName(t._2().getMasterName()); + return t._1(); + }; + } + + private static FlatMapFunction flattenCfHbFn() { + return r -> Stream + .concat( + r.getCollectedfrom().stream().map(KeyValue::getKey), + Stream + .concat( + r.getInstance().stream().map(Instance::getHostedby).map(KeyValue::getKey), + r.getInstance().stream().map(Instance::getCollectedfrom).map(KeyValue::getKey))) + .distinct() + .map(s -> asIdCfHbMapping(r.getId(), s)) + .iterator(); + } + private static MapGroupsFunction, T> getMapGroupsFunction() { return new MapGroupsFunction, T>() { @Override - public T call(String key, Iterator> values) throws Exception { + public T call(String key, Iterator> values) { final Tuple2 first = values.next(); final T res = first._1(); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/cfhb/CleanCfHbSparkJobTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/cfhb/CleanCfHbSparkJobTest.java index 680d1ff64..b0097ed6f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/cfhb/CleanCfHbSparkJobTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/cfhb/CleanCfHbSparkJobTest.java @@ -1,8 +1,16 @@ + package eu.dnetlib.dhp.oa.graph.clean.cfhb; -import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.schema.oaf.Dataset; -import eu.dnetlib.dhp.schema.oaf.Publication; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.io.File; +import java.io.IOException; +import java.net.URISyntaxException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; + import org.apache.commons.io.FileUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaSparkContext; @@ -13,106 +21,193 @@ import org.junit.jupiter.api.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.File; -import java.io.IOException; -import java.net.URISyntaxException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.schema.oaf.Dataset; +import eu.dnetlib.dhp.schema.oaf.Publication; public class CleanCfHbSparkJobTest { - private static final Logger log = LoggerFactory.getLogger(CleanCfHbSparkJobTest.class); + private static final Logger log = LoggerFactory.getLogger(CleanCfHbSparkJobTest.class); - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - private static SparkSession spark; + private static SparkSession spark; - private static Path testBaseTmpPath; + private static Path testBaseTmpPath; - private static String resolvedPath; + private static String resolvedPath; - private static String graphInputPath; + private static String graphInputPath; - private static String graphOutputPath; + private static String graphOutputPath; - private static String dsMasterDuplicatePath; + private static String dsMasterDuplicatePath; - @BeforeAll - public static void beforeAll() throws IOException, URISyntaxException { + @BeforeAll + public static void beforeAll() throws IOException, URISyntaxException { - testBaseTmpPath = Files.createTempDirectory(CleanCfHbSparkJobTest.class.getSimpleName()); - log.info("using test base path {}", testBaseTmpPath); + testBaseTmpPath = Files.createTempDirectory(CleanCfHbSparkJobTest.class.getSimpleName()); + log.info("using test base path {}", testBaseTmpPath); - final File entitiesSources = Paths - .get(CleanCfHbSparkJobTest.class.getResource("/eu/dnetlib/dhp/oa/graph/clean/cfhb/entities").toURI()) - .toFile(); + final File entitiesSources = Paths + .get(CleanCfHbSparkJobTest.class.getResource("/eu/dnetlib/dhp/oa/graph/clean/cfhb/entities").toURI()) + .toFile(); - FileUtils - .copyDirectory( - entitiesSources, - testBaseTmpPath.resolve("input").resolve("entities").toFile()); + FileUtils + .copyDirectory( + entitiesSources, + testBaseTmpPath.resolve("input").resolve("entities").toFile()); - FileUtils - .copyFileToDirectory( - Paths - .get(CleanCfHbSparkJobTest.class.getResource("/eu/dnetlib/dhp/oa/graph/clean/cfhb/masterduplicate.json").toURI()) - .toFile(), - testBaseTmpPath.resolve("workingDir").resolve("masterduplicate").toFile()); + FileUtils + .copyFileToDirectory( + Paths + .get( + CleanCfHbSparkJobTest.class + .getResource("/eu/dnetlib/dhp/oa/graph/clean/cfhb/masterduplicate.json") + .toURI()) + .toFile(), + testBaseTmpPath.resolve("workingDir").resolve("masterduplicate").toFile()); + graphInputPath = testBaseTmpPath.resolve("input").resolve("entities").toString(); + resolvedPath = testBaseTmpPath.resolve("workingDir").resolve("cfHbResolved").toString(); + graphOutputPath = testBaseTmpPath.resolve("workingDir").resolve("cfHbPatched").toString(); + dsMasterDuplicatePath = testBaseTmpPath.resolve("workingDir").resolve("masterduplicate").toString(); - graphInputPath = testBaseTmpPath.resolve("input").resolve("entities").toString(); - resolvedPath = testBaseTmpPath.resolve("workingDir").resolve("cfHbResolved").toString(); - graphOutputPath = testBaseTmpPath.resolve("workingDir").resolve("cfHbPatched").toString(); - dsMasterDuplicatePath = testBaseTmpPath.resolve("workingDir").resolve("masterduplicate").toString(); + SparkConf conf = new SparkConf(); + conf.setAppName(CleanCfHbSparkJobTest.class.getSimpleName()); - SparkConf conf = new SparkConf(); - conf.setAppName(CleanCfHbSparkJobTest.class.getSimpleName()); + conf.setMaster("local[*]"); + conf.set("spark.driver.host", "localhost"); + conf.set("spark.ui.enabled", "false"); - conf.setMaster("local[*]"); - conf.set("spark.driver.host", "localhost"); - conf.set("spark.ui.enabled", "false"); + spark = SparkSession + .builder() + .appName(CleanCfHbSparkJobTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + } - spark = SparkSession - .builder() - .appName(CleanCfHbSparkJobTest.class.getSimpleName()) - .config(conf) - .getOrCreate(); - } + @AfterAll + public static void afterAll() throws IOException { + FileUtils.deleteDirectory(testBaseTmpPath.toFile()); + spark.stop(); + } - @AfterAll - public static void afterAll() throws IOException { - FileUtils.deleteDirectory(testBaseTmpPath.toFile()); - spark.stop(); - } + @Test + void testCleanCfHbSparkJob() throws Exception { + final String outputPath = graphOutputPath + "/dataset"; + final String inputPath = graphInputPath + "/dataset"; - @Test - void testCleanCfHbSparkJob() throws Exception { - final String outputPath = graphOutputPath + "/dataset"; - CleanCfHbSparkJob - .main( - new String[] { - "--isSparkSessionManaged", Boolean.FALSE.toString(), - "--inputPath", graphInputPath + "/dataset", - "--outputPath", outputPath, - "--resolvedPath", resolvedPath + "/dataset", - "--graphTableClassName", Dataset.class.getCanonicalName(), - "--datasourceMasterDuplicate", dsMasterDuplicatePath - }); + org.apache.spark.sql.Dataset records = read(spark, inputPath, Dataset.class); + Dataset d = records + .filter("id = '50|doi_________::09821844208a5cd6300b2bfb13bca1b9'") + .first(); + assertEquals("10|re3data_____::4c4416659cb74c2e0e891a883a047cbc", d.getCollectedfrom().get(0).getKey()); + assertEquals("Bacterial Protein Interaction Database - DUP", d.getCollectedfrom().get(0).getValue()); + assertEquals( + "10|re3data_____::4c4416659cb74c2e0e891a883a047cbc", d.getInstance().get(0).getCollectedfrom().getKey()); + assertEquals( + "Bacterial Protein Interaction Database - DUP", d.getInstance().get(0).getCollectedfrom().getValue()); - //final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + d = records + .filter("id = '50|DansKnawCris::0dd644304b7116e8e58da3a5e3adc37a'") + .first(); + assertEquals("10|opendoar____::788b4ac1e172d8e520c2b9461c0a3d35", d.getCollectedfrom().get(0).getKey()); + assertEquals("FILUR DATA - DUP", d.getCollectedfrom().get(0).getValue()); + assertEquals( + "10|opendoar____::788b4ac1e172d8e520c2b9461c0a3d35", d.getInstance().get(0).getCollectedfrom().getKey()); + assertEquals("FILUR DATA - DUP", d.getInstance().get(0).getCollectedfrom().getValue()); + assertEquals( + "10|re3data_____::6ffd7bc058f762912dc494cd9c175341", d.getInstance().get(0).getHostedby().getKey()); + assertEquals("depositar - DUP", d.getInstance().get(0).getHostedby().getValue()); - Assertions.assertTrue(Files.exists(Paths.get(graphOutputPath, "dataset"))); + d = records + .filter("id = '50|DansKnawCris::203a27996ddc0fd1948258e5b7dec61c'") + .first(); + assertEquals("10|openaire____::c6df70599aa984f16ee52b4b86d2e89f", d.getCollectedfrom().get(0).getKey()); + assertEquals("DANS (Data Archiving and Networked Services)", d.getCollectedfrom().get(0).getValue()); + assertEquals( + "10|openaire____::c6df70599aa984f16ee52b4b86d2e89f", d.getInstance().get(0).getCollectedfrom().getKey()); + assertEquals( + "DANS (Data Archiving and Networked Services)", d.getInstance().get(0).getCollectedfrom().getValue()); + assertEquals( + "10|openaire____::c6df70599aa984f16ee52b4b86d2e89f", d.getInstance().get(0).getHostedby().getKey()); + assertEquals("DANS (Data Archiving and Networked Services)", d.getInstance().get(0).getHostedby().getValue()); - final org.apache.spark.sql.Dataset d = spark - .read() - .textFile(outputPath) - .map(as(Dataset.class), Encoders.bean(Dataset.class)); - Assertions.assertEquals(3, d.count()); - - } + CleanCfHbSparkJob + .main( + new String[] { + "--isSparkSessionManaged", Boolean.FALSE.toString(), + "--inputPath", inputPath, + "--outputPath", outputPath, + "--resolvedPath", resolvedPath + "/dataset", + "--graphTableClassName", Dataset.class.getCanonicalName(), + "--datasourceMasterDuplicate", dsMasterDuplicatePath + }); - private static MapFunction as(Class clazz) { - return s -> OBJECT_MAPPER.readValue(s, clazz); - } + assertTrue(Files.exists(Paths.get(graphOutputPath, "dataset"))); + + records = read(spark, outputPath, Dataset.class); + + assertEquals(3, records.count()); + + d = records + .filter("id = '50|doi_________::09821844208a5cd6300b2bfb13bca1b9'") + .first(); + assertEquals("10|fairsharing_::a29d1598024f9e87beab4b98411d48ce", d.getCollectedfrom().get(0).getKey()); + assertEquals("Bacterial Protein Interaction Database", d.getCollectedfrom().get(0).getValue()); + assertEquals( + "10|fairsharing_::a29d1598024f9e87beab4b98411d48ce", d.getInstance().get(0).getCollectedfrom().getKey()); + assertEquals("Bacterial Protein Interaction Database", d.getInstance().get(0).getCollectedfrom().getValue()); + + d = records + .filter("id = '50|DansKnawCris::0dd644304b7116e8e58da3a5e3adc37a'") + .first(); + assertEquals("10|re3data_____::fc1db64b3964826913b1e9eafe830490", d.getCollectedfrom().get(0).getKey()); + assertEquals("FULIR Data", d.getCollectedfrom().get(0).getValue()); + assertEquals( + "10|re3data_____::fc1db64b3964826913b1e9eafe830490", d.getInstance().get(0).getCollectedfrom().getKey()); + assertEquals("FULIR Data", d.getInstance().get(0).getCollectedfrom().getValue()); + assertEquals( + "10|fairsharing_::3f647cadf56541fb9513cb63ec370187", d.getInstance().get(0).getHostedby().getKey()); + assertEquals("depositar", d.getInstance().get(0).getHostedby().getValue()); + + d = records + .filter("id = '50|DansKnawCris::203a27996ddc0fd1948258e5b7dec61c'") + .first(); + assertEquals("10|openaire____::c6df70599aa984f16ee52b4b86d2e89f", d.getCollectedfrom().get(0).getKey()); + assertEquals("DANS (Data Archiving and Networked Services)", d.getCollectedfrom().get(0).getValue()); + assertEquals( + "10|openaire____::c6df70599aa984f16ee52b4b86d2e89f", d.getInstance().get(0).getCollectedfrom().getKey()); + assertEquals( + "DANS (Data Archiving and Networked Services)", d.getInstance().get(0).getCollectedfrom().getValue()); + assertEquals( + "10|openaire____::c6df70599aa984f16ee52b4b86d2e89f", d.getInstance().get(0).getHostedby().getKey()); + assertEquals("DANS (Data Archiving and Networked Services)", d.getInstance().get(0).getHostedby().getValue()); + + d = records + .filter("id = '50|DansKnawCris::203a27996ddc0fd1948258e5b7dec61c'") + .first(); + assertEquals("10|openaire____::c6df70599aa984f16ee52b4b86d2e89f", d.getCollectedfrom().get(0).getKey()); + assertEquals("DANS (Data Archiving and Networked Services)", d.getCollectedfrom().get(0).getValue()); + assertEquals( + "10|openaire____::c6df70599aa984f16ee52b4b86d2e89f", d.getInstance().get(0).getCollectedfrom().getKey()); + assertEquals( + "DANS (Data Archiving and Networked Services)", d.getInstance().get(0).getCollectedfrom().getValue()); + assertEquals( + "10|openaire____::c6df70599aa984f16ee52b4b86d2e89f", d.getInstance().get(0).getHostedby().getKey()); + assertEquals("DANS (Data Archiving and Networked Services)", d.getInstance().get(0).getHostedby().getValue()); + } + + private org.apache.spark.sql.Dataset read(SparkSession spark, String path, Class clazz) { + return spark + .read() + .textFile(path) + .map(as(clazz), Encoders.bean(clazz)); + } + + private static MapFunction as(Class clazz) { + return s -> OBJECT_MAPPER.readValue(s, clazz); + } } From bb0ddc1c4455a85acf6b896d243192f996835ff0 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 30 Nov 2022 09:56:24 +0100 Subject: [PATCH 34/55] [BulkTag] adding verb starts_with --- .../dhp/bulktag/criteria/StartsWithVerb.java | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/StartsWithVerb.java diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/StartsWithVerb.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/StartsWithVerb.java new file mode 100644 index 000000000..f22e1a864 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/StartsWithVerb.java @@ -0,0 +1,30 @@ + +package eu.dnetlib.dhp.bulktag.criteria; + +import java.io.Serializable; + +@VerbClass("starts_with") +public class StartsWithVerb implements Selection, Serializable { + + private String param; + + public StartsWithVerb() { + } + + public StartsWithVerb(final String param) { + this.param = param; + } + + @Override + public boolean apply(String value) { + return value.startsWith(param); + } + + public String getParam() { + return param; + } + + public void setParam(String param) { + this.param = param; + } +} From ce020f2c837207bc7e2f6b9062d56d07196f0cd5 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 30 Nov 2022 09:57:30 +0100 Subject: [PATCH 35/55] [EOSC FUTURE] added resources and test for review --- .../provision/IndexRecordTransformerTest.java | 20 ++++ .../provision/eosc-future/zenodo7351221.xml | 99 +++++++++++++++++ .../provision/eosc-future/zenodo7351393.xml | 100 ++++++++++++++++++ .../provision/eosc-future/zenodo7353841.xml | 85 +++++++++++++++ 4 files changed, 304 insertions(+) create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/zenodo7351221.xml create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/zenodo7351393.xml create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/zenodo7353841.xml diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java index 17c3cdb30..d9dc609d8 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java @@ -142,6 +142,26 @@ public class IndexRecordTransformerTest { testRecordTransformation(record); } + @Test + public void testForEOSCFutureZenodo7353841() throws IOException, TransformerException { + final String record = IOUtils + .toString(getClass().getResourceAsStream("eosc-future/zenodo7353841.xml")); + testRecordTransformation(record); + } + + @Test + public void testForEOSCFutureZenodo7351393() throws IOException, TransformerException { + final String record = IOUtils + .toString(getClass().getResourceAsStream("eosc-future/zenodo7351393.xml")); + testRecordTransformation(record); + } + + @Test + public void testForEOSCFutureZenodo7351221() throws IOException, TransformerException { + final String record = IOUtils + .toString(getClass().getResourceAsStream("eosc-future/zenodo7351221.xml")); + testRecordTransformation(record); + } @Test void testDoiUrlNormalization() throws MalformedURLException { diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/zenodo7351221.xml b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/zenodo7351221.xml new file mode 100644 index 000000000..7f1e66dac --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/zenodo7351221.xml @@ -0,0 +1,99 @@ + + +
+ doi_________::9cb0664d4c891c4baaf73f007c0c9de0 + 2022-11-25T12:55:13Z + under curation + +
+ + + + + COVID-19 Knowledge Graph: A semantic resource embedding biological and chemical entities + Karki, Reagon + + + +

A Knowledge graph representation of compounds and associated biological entities in the BY-COVID and EOSC Future project.

Current status

  • Number of Nodes: 35952
  • Number of Edges: 279462
  • Human Proteins: 1347
  • Assay: 15835
  • Chemical/Compound: 4096
  • Mechanism of Action: 739
  • Pathway: 1513
  • Disease: 1585
  • SideEffect: 7420
  • Biological Process: 2085
  • Molecular Function: 1332

Please check the BY_COVID_update_August.ipynb for understanding step wise process of KG generation and KG statistics. The KG has been exported to formats such as graphml, sif and so on for visualizations in other platforms. For example, the graphml file can be imported to Cytoscape directly. These files are located in 'data\export' folder.

+ + + + Zenodo + + + + + + + + + + + + + + + + + + + oai:zenodo.org:7351221 + + oai:zenodo.org:7351221 + 10.5281/zenodo.7351221 + + + + false + false + 0.9 + + + + + + corda__h2020::256485716fdb9f5ca69007b7ca5a072b + 101017536 + EOSC Future + EOSC Future + + + + ec__________::EC::H2020 + + + + + corda__h2020::4a3254eac2997eee0a9dcb7a7daedb81 + 101046203 + BY-COVID + Beyond COVID + + + + ec__________::EC::Horizon Europe Framework Programme - HORIZON-RIA\HORIZON Action Grant Budget-Based + + + + + + + + + + + + + https://zenodo.org/record/7351221 + + + +
+
+
+
+
\ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/zenodo7351393.xml b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/zenodo7351393.xml new file mode 100644 index 000000000..21b656d2c --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/zenodo7351393.xml @@ -0,0 +1,100 @@ + + +
+ doi_________::07fdccabd77830e3caccf0b33c083f1b + 2022-11-25T01:08:31Z + under curation + +
+ + + + Monkeypox Knowledge Graph: A semantic resource embedding biological and chemical entities + Karki, Reagon + Andrea, Zaliani + Gadiya, Yojana + Gribbon, Philip + + + +

The Monkeypox KG is built using viral and human proteins reported in different resources. Additionally, the KG represents chemicals tested against Monkeypox and their targets, associated biological processes, molecular functions, diseases and side effects.

KG status

Version 1 stats:

  • Number of Nodes: 8235
  • Number of Edges: 40422

Version 2 stats (2nd September) :

  • Number of Nodes: 9129
  • Number of Edges: 44568

Please check the graph.ipynb for understanding step wise process of KG generation and KG statistics. The KG has been exported to formats such as graphml, sif and so on for visualizations in other platforms. For example, the graphml file can be imported to Cytoscape directly. These files are located in 'data\export' folder.

+ + + + Zenodo + + + + + + + + + + + + + + + + + + + oai:zenodo.org:7351393 + + oai:zenodo.org:7351393 + 10.5281/zenodo.7351393 + + + false + false + 0.9 + + + + + + corda__h2020::256485716fdb9f5ca69007b7ca5a072b + 101017536 + EOSC Future + EOSC Future + + + + ec__________::EC::H2020 + + + + + corda__h2020::4a3254eac2997eee0a9dcb7a7daedb81 + 101046203 + BY-COVID + Beyond COVID + + + + ec__________::EC::Horizon Europe Framework Programme - HORIZON-RIA\HORIZON Action Grant Budget-Based + + + + + + + + + + + + + https://zenodo.org/record/7351393 + + + +
+
+
+
+
\ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/zenodo7353841.xml b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/zenodo7353841.xml new file mode 100644 index 000000000..4bd287e50 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/zenodo7353841.xml @@ -0,0 +1,85 @@ + + +
+ doi_________::93d39dd7edef016928788c3500e149f1 + 2022-11-24T08:41:37Z + under curation + +
+ + + + ENVRI SP - Dashboard State of the Environment - Ocean Indicators + Tjerk Krijger + + + +

The attached .yaml file is used as input to the Dashboard State of the Environment, which is a science project of the ENVRI-FAIR science cluster within EOSC-FUTURE. The contents of the file enable the visualization of Ocean indicators on the dashboard. It is possible to download the attached file and change the contents to include indicators from different domains such as atmosphere or biodiversity.

+ + + + Zenodo + + + + + + + + + + + + + + + + + + + oai:zenodo.org:7353841 + + oai:zenodo.org:7353841 + 10.5281/zenodo.7353841 + + + false + false + 0.9 + + + + + + corda__h2020::256485716fdb9f5ca69007b7ca5a072b + 101017536 + EOSC Future + EOSC Future + + + + ec__________::EC::H2020 + + + + + + + + + + + + + https://zenodo.org/record/7353841 + + + +
+
+
+
+
From 062abfd669fbb70ede939ba0e65fa44a59ef49ac Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 6 Dec 2022 12:04:00 +0100 Subject: [PATCH 36/55] fixed NPE, removed unused stuff --- .../java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java index 82bf87cca..7637cde93 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java @@ -94,7 +94,6 @@ public class DedupRecordFactory { BeanUtils.copyProperties(entity, first); - final Collection dates = Lists.newArrayList(); final List> authors = Lists.newArrayList(); entityList @@ -103,16 +102,14 @@ public class DedupRecordFactory { entity.mergeFrom(duplicate); if (ModelSupport.isSubClass(duplicate, Result.class)) { Result r1 = (Result) duplicate; - if (r1.getAuthor() != null && StringUtils.isNotBlank(r1.getDateofacceptance().getValue())) - authors.add(r1.getAuthor()); - if (r1.getDateofacceptance() != null) - dates.add(r1.getDateofacceptance().getValue()); + Optional + .ofNullable(r1.getAuthor()) + .ifPresent(a -> authors.add(a)); } }); // set authors and date if (ModelSupport.isSubClass(entity, Result.class)) { - // ((Result) entity).setDateofacceptance(DatePicker.pick(dates)); ((Result) entity).setAuthor(AuthorMerger.merge(authors)); } From cd1b58483ec457cd5624bf176c71e8ff45af9e2a Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 7 Dec 2022 10:38:42 +0100 Subject: [PATCH 37/55] [bulk tag] fixed Community configuration parsing to void NPE --- .../CommunityConfigurationFactory.java | 6 +- .../CommunityConfigurationFactoryTest.java | 32 + .../community_configuration_selcrit2.xml | 4459 +++++++++++++++++ 3 files changed, 4494 insertions(+), 3 deletions(-) create mode 100644 dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/communityconfiguration/community_configuration_selcrit2.xml diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfigurationFactory.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfigurationFactory.java index 52ca606fc..ae1deaa73 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfigurationFactory.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfigurationFactory.java @@ -90,12 +90,12 @@ public class CommunityConfigurationFactory { } private static SelectionConstraints parseConstrains(Node node) { - Node aconstraints = node.selectSingleNode("./advancedConstraints"); - if (aconstraints == null) { + Node advConstsNode = node.selectSingleNode("./advancedConstraints"); + if (advConstsNode == null || StringUtils.isBlank(StringUtils.trim(advConstsNode.getText()))) { return null; } SelectionConstraints selectionConstraints = new Gson() - .fromJson(aconstraints.getText(), SelectionConstraints.class); + .fromJson(advConstsNode.getText(), SelectionConstraints.class); selectionConstraints.setSelection(resolver); return selectionConstraints; diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/CommunityConfigurationFactoryTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/CommunityConfigurationFactoryTest.java index 861546adb..e3a660835 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/CommunityConfigurationFactoryTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/CommunityConfigurationFactoryTest.java @@ -83,4 +83,36 @@ class CommunityConfigurationFactoryTest { Assertions.assertEquals("dariah", comm.get(0)); } + @Test + void loadSelCriteriaTest2() throws DocumentException, IOException, SAXException { + String xml = IOUtils + .toString( + getClass() + .getResourceAsStream( + "/eu/dnetlib/dhp/bulktag/communityconfiguration/community_configuration_selcrit2.xml")); + final CommunityConfiguration cc = CommunityConfigurationFactory.newInstance(xml); + Map> param = new HashMap<>(); + param.put("author", new ArrayList<>(Collections.singletonList("Pippo Pippi"))); + param + .put( + "description", + new ArrayList<>( + Collections + .singletonList( + "This work has been partially supported by DARIAH-EU infrastructure"))); + param + .put( + "contributor", + new ArrayList<>( + Collections + .singletonList( + "Author X helped to write the paper. X works for DARIAH"))); + List comm = cc + .getCommunityForDatasource( + "openaire____::1cfdb2e14977f31a98e0118283401f32", param); + + //TODO add more assertions + Assertions.assertEquals(0, comm.size()); + } + } diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/communityconfiguration/community_configuration_selcrit2.xml b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/communityconfiguration/community_configuration_selcrit2.xml new file mode 100644 index 000000000..b2b62e314 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/communityconfiguration/community_configuration_selcrit2.xml @@ -0,0 +1,4459 @@ + + + + + + + orp_________::9b49c76cf40a93f89944889678b741f9 + + + + + + egi + + + + + + + + + Ocean mapping + Multibeam Backscatter + Bathymetry + Seabed classification + Submarine Geomorphology + Underwater Photogrammetry + + + + issn___print::aedc6f1e34247e464bc8b45270e4b972 + {"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"bathymetry"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"description","value":"bathymetry"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"description","value":"backscatter"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"description","value":"photogrammetry"},{"verb":"contains_caseinsensitive","field":"description","value":"underwater"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"description","value":"multibeam"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"description","value":"bathymetric"}]}]} + + + issn___print::ed5a370152cb6cafc3d5bff280a94c88 + {"criteria":[{"constraint":[{"verb":"equals_caseinsensitive","field":"title","value":"submarine geomorphology"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"description","value":"bathymetry"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"description","value":"marine"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"description","value":"photogrammetry"},{"verb":"contains_caseinsensitive","field":"description","value":"underwater"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"description","value":"backscatter"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"description","value":"multibeam"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"description","value":"bathymetric"}]}]} + + + opendoar____::99f59c0842e83c808dd1813b48a37c6a + {"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"description","value":"bathymetry"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"bathymetry"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"description","value":"photogrammetry"},{"verb":"contains_caseinsensitive","field":"description","value":"underwater"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"description","value":"backscatter"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"description","value":"multibeam"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"description","value":"bathymetric"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"description","value":"bathymetric"}]}]} + + + opendoar____::8f14e45fceea167a5a36dedd4bea2543 + {"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"description","value":"bathymetry"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"description","value":"backscatter"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"bathymetry"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"description","value":"photogrammetry"},{"verb":"contains_caseinsensitive","field":"description","value":"underwater"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"description","value":"multibeam"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"description","value":"bathymetric"}]}]} + + + issn___print::d143b63af2b83be033d45d82f741761b + {"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"description","value":"bathymetry"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"description","value":"backscatter"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"description","value":"photogrammetry"},{"verb":"contains_caseinsensitive","field":"description","value":"underwater"},{"verb":"contains_caseinsensitive","field":"description","value":"marine"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"description","value":"underwater"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"description","value":"marine"}]}]} + + + issn___print::780e93f1de06f8b0eb4e98606dfc7197 + {"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"description","value":"underwater"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"bathymetric"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"description","value":"backscatter"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"bathymetry"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"description","value":"photogrammetry"},{"verb":"contains_caseinsensitive","field":"description","value":"underwater"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"description","value":"multibeam"}]}]} + + + doajarticles::be08e041165577d36a805b13cd7d2ba7 + {"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"underwater"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"description","value":"bathymetry"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"description","value":"backscatter"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"description","value":"photogrammetry"},{"verb":"contains_caseinsensitive","field":"description","value":"underwater"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"submarine"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"description","value":"multibeam"}]}]} + + + opendoar____::ea119a40c1592979f51819b0bd38d39d + {"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"description","value":"bathymetry"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"description","value":"backscatter"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"submarine"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"underwater"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"description","value":"backscatter"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"description","value":"photogrammetry"},{"verb":"contains_caseinsensitive","field":"description","value":"underwater"}]}]} + + + doajarticles::795b5787c6d1515629042fd2f398dd0e + {"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"submarine"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"description","value":"bathymetry"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"description","value":"backscatter"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"description","value":"underwater"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"description","value":"photogrammetry"},{"verb":"contains_caseinsensitive","field":"description","value":"underwater"}]}]} + + + issn___print::bfcfaf0f496b008f5ab4381976a78743 + {"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"submarine"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"description","value":"bathymetry"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"description","value":"bathymetric"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"description","value":"underwater"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"description","value":"marine"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"description","value":"multibeam"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"description","value":"backscatter"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"description","value":"photogrammetry"},{"verb":"contains_caseinsensitive","field":"description","value":"underwater"}]}]} + + + re3data_____::9633d1e8c4309c833c2c442abeb0cfeb + {"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"bathymetry"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"multibeam"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"backscatter"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"description","value":"bathymetry"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"description","value":"backscatter"}]}]} + + + + + obsea + + + + openearth + + + + bermudabream + + + + marxiv + + + + meditmarsci + + + + sail + + + + greenmardata + + + + bluecloud + + + + emso-eric + + + + atlas + + + + medship-goship + + + + + + + + + + + + re3data_____::a507cdacc5bbcc08761c92185dee5cab + + + + + + oac_clarin + + + + + + + + + + + + rda-related + + + rda + + + + + + + + + + SDG13 - Climate action + SDG8 - Decent work and economic + growth + SDG15 - Life on land + SDG2 - Zero hunger + SDG17 - Partnerships for the + goals + SDG10 - Reduced inequalities + SDG5 - Gender equality + SDG12 - Responsible + consumption and production + SDG14 - Life below water + SDG6 - Clean water and + sanitation + SDG11 - Sustainable cities and communities + SDG1 - No poverty + SDG3 - + Good health and well being + SDG7 - Affordable and clean energy + SDG4 - Quality + education + SDG9 - Industry innovation and infrastructure + SDG16 - Peace justice + and strong institutions + + + + + oac_sdsn-greece + + + + + + + + + modern art + monuments + europeana data model + field walking + frescoes + LIDO metadata schema + art history + excavation + Arts and Humanities General + coins + temples + numismatics + lithics + environmental archaeology + digital cultural heritage + archaeological reports + history + CRMba + churches + cultural heritage + archaeological stratigraphy + religious art + digital humanities + archaeological sites + linguistic studies + bioarchaeology + architectural orders + palaeoanthropology + fine arts + europeana + CIDOC CRM + decorations + classic art + stratigraphy + digital archaeology + intangible cultural heritage + walls + chapels + CRMtex + Language and Literature + paintings + archaeology + mosaics + burials + medieval art + castles + CARARE metadata schema + statues + natural language processing + inscriptions + CRMsci + vaults + contemporary art + Arts and Humanities + CRMarchaeo + pottery + + + + re3data_____::9ebe127e5f3a0bf401875690f3bb6b81 + + + + doajarticles::c6cd4b532e12868c1d760a8d7cda6815 + + + + doajarticles::a6de4499bb87bf3c01add0a9e2c9ed0b + + + + doajarticles::6eb31d13b12bc06bbac06aef63cf33c9 + + + + doajarticles::0da84e9dfdc8419576169e027baa8028 + + + + re3data_____::a48f09c562b247a9919acfe195549b47 + + + + doajarticles::2899208a99aa7d142646e0a80bfeef05 + + + + issn___print::9c2ffbdfdc48f6df69a6a3e6a280fae1 + + + + issn___print::72d240a2d51f2c2d77326e010cf22b17 + + + + issn__online::92e1d78d81161cd33f11e8224a57c8b9 + + + + issn___print::0a35021b9120f06d707698476b7e1882 + + + + issn__online::fb8a8d9129fd90ab209688cf42f9baa7 + + + + re3data_____::69162d0a40bab7cc80b40ec90da874b9 + + + + doajarticles::cdccbf32233ba16782875c757ababf11 + + + + doajarticles::b451f47924a84a65fa51bc82516993ce + + + + openaire____::ec63975d4da67750eede546e4aac955c + + + + re3data_____::2201b9b181a9f8361b3d3fbf496d18d7 + + + + issn___print::9519433d216b7ef0c292a355a99d0fb7 + + + + issn___print::7e7ed6b3beb5404bbd9354b6809fc903 + + + + doajarticles::e7d82763b1b210e769fd10d6648504ea + + + + issn___print::b8cdab13c697a46c6c1c243d57bce3fe + + + + re3data_____::f52792889d64d1a688b43ed989f6464a + + + + re3data_____::a507cdacc5bbcc08761c92185dee5cab + + + + re3data_____::fe0d76581a60e1287a93e2ed2cb29339 + + + + re3data_____::d9b253653defb9c50dbf5eed44f68c73 + + + + re3data_____::b47b4a53d7ad3c7d1e60d5a6fbb8c640 + + + + re3data_____::0d863103c89be7344eb200475aca0647 + + + + re3data_____::afc322a1da36b057851bbd025e9b0115 + + + + + + oac_dh-ch + + + storm + + + + crosscult + + + + wholodance_eu + + + + digcur2013 + + + + gravitate + + + + dipp2014 + + + + digitalhumanities + + + + dimpo + + + + adho + + + + chc + + + + wahr + + + + ibe + + + + ariadne + + + + parthenos-hub + + + + parthenos-training + + + + gandhara + + + + cmsouthasia + + + + nilgirihills + + + + shamsa_mustecio + + + + bodhgaya + + + + + + + + + + brain mapping + brain imaging + electroencephalography + arterial spin labelling + brain fingerprinting + brain + neuroimaging + Multimodal Brain Image Analysis + fMRI + neuroinformatics + fetal brain + brain ultrasonic imaging + topographic brain mapping + diffusion tensor imaging + computerized knowledge assessment + connectome mapping + brain magnetic resonance imaging + brain abnormalities + + + + re3data_____::5b9bf9171d92df854cf3c520692e9122 + + + + doajarticles::c7d3de67dc77af72f6747157441252ec + + + + re3data_____::8515794670370f49c1d176c399c714f5 + + + + doajarticles::d640648c84b10d425f96f11c3de468f3 + + + + doajarticles::0c0e74daa5d95504eade9c81ebbd5b8a + + + + rest________::fb1a3d4523c95e63496e3bc7ba36244b + + + + issn___print::3a700379401534d8f70b632d72d05f9c + + + + + + oac_ni + + + neuroinformatics + + + + hbp + + + + from_neuroscience_to_machine_learning + + + + ci2c + + + + opensourcebrain + + + + brainspeak + + + + braincom + + + + nextgenvis + + + + meso-brain + + + + neuroplasticity-workshop + + + + bionics + + + + brainmattrain-676408 + + + + repronim + + + + affectiveneuro + + + + con + + + + lab_neurol_sperim_irfmn_irccs_milano_it + + + + + + + + + + marine + ocean + fish + aqua + sea + + + + opendoar____::8f14e45fceea167a5a36dedd4bea2543 + + + + opendoar____::ea119a40c1592979f51819b0bd38d39d + + + + opendoar____::35d02fef7d9a24e237057162abab82b7 + + + + doajarticles::e5934203928262dca69c4fc3e3e95ead + + + + issn___print::90b2464d021b3b394af968bc90ef660b + + + + openaire____::c739f47b413c0ca5f8577b9f6a5c35ce + + + + + + oac_mes + + + adriplan + + + + devotes-project + + + + euro-basin + + + + naclim + + + + discardless + + + + assisibf + + + + meece + + + + facts + + + + proeel + + + + aquatrace + + + + myfish + + + + atlas + + + + blue-actionh2020 + + + + sponges + + + + merces_project + + + + bigdataocean + + + + columbus + + + + h2020-aquainvad-ed + + + + aquarius + + + + southern-ocean-observing-system + + + + eawag + + + + mossco + + + + onc + + + + oceanbiogeochemistry + + + + oceanliteracy + + + + openearth + + + + ocean + + + + calcifierraman + + + + bermudabream + + + + brcorp1 + + + + mce + + + + biogeochem + + + + ecc2014 + + + + fisheries + + + + sedinstcjfas + + + + umr-entropie + + + + farfish2020 + + + + primefish + + + + zf-ilcs + + + + climefish + + + + afrimed_eu + + + + spi-ace + + + + cice-consortium + + + + nemo-ocean + + + + mesopp-h2020 + + + + marxiv + + + + + + + + + + + + + oac_instruct + + + instruct + + + + west-life + + + + + + + + + + + + + oaa_elixir-gr + + + + + + + + + + + opendoar____::7e7757b1e12abcb736ab9a754ffb617a + {"criteria":[{"constraint":[{"verb":"contains","field":"contributor","value":"DARIAH"}]}]} + + + opendoar____::96da2f590cd7246bbde0051047b0d6f7 + {"criteria":[{"constraint":[{"verb":"contains","field":"contributor","value":"DARIAH"}]}]} + + + re3data_____::f52792889d64d1a688b43ed989f6464a + {"criteria":[{"constraint":[{"verb":"equals","field":"contributor","value":"Architrave"}]},{"constraint":[{"verb":"equals","field":"contributor","value":"Fontane Notizbücher"}]},{"constraint":[{"verb":"equals","field":"contributor","value":"FreiDi"}]},{"constraint":[{"verb":"equals","field":"contributor","value":"Virtuelles Skriptorium St. Matthias"}]},{"constraint":[{"verb":"equals","field":"contributor","value":"ePoetics"}]}]} + + + + + dariah + + + dimpo + + + + + + + + + + + + + risis + + + + + + + + + + + + + + + + + Green Transport + City mobility systems + Vulnerable road users + Traffic engineering + Transport electrification + Intermodal freight transport + Clean vehicle fleets + Intelligent mobility + Inflight refueling + District mobility systems + Navigation and control systems for optimised planning and routing + European Space Technology Platform + European Transport networks + Green cars + Inter-modality infrastructures + Advanced Take Off and Landing Ideas + Sustainable urban systems + port-area railway networks + Innovative forms of urban transport + Alliance for Logistics Innovation through Collaboration in Europe + Advisory Council for Aeronautics Research in Europe + Mobility services for people and goods + Guidance and traffic management + Passenger mobility + Smart mobility and services + transport innovation + high-speed railway + Vehicle design + Inland shipping + public transportation + aviation’s climate impact + Road transport + On-demand public transport + Personal Air Transport + Pipeline transport + European Association of Aviation Training and Education Organisations + Defrosting of railway infrastructure + Inclusive and affordable transport + River Information Services + jel:L92 + Increased use of public transport + Seamless mobility + STRIA + trolleybus transport + Intelligent Transport System + Low-emission alternative energy for transport + Shared mobility for people and goods + Business model for urban mobility + Interoperability of transport systems + Cross-border train slot booking + Air transport + Transport pricing + Sustainable transport + European Rail Transport Research Advisory Council + Alternative aircraft configurations + Railways applications + urban transport + Environmental impact of transport + urban freight delivery systems + Automated Road Transport + Alternative fuels in public transport + Active LIDAR-sensor for GHG-measurements + Autonomous logistics operations + Rational use of motorised transport + Network and traffic management systems + electrification of railway wagons + Single European Sky + Electrified road systems + Railway dynamics + Motorway of the Sea + smart railway communications + Maritime transport + Environmental- friendly transport + Combined transport + Connected automated driving technology + Innovative freight logistics services + automated and shared vehicles + Alternative Aircraft Systems + Land-use and transport interaction + Public transport system + Business plan for shared mobility + Shared mobility + Growing of mobility demand + European Road Transport Research Advisory Council + WATERBORNE ETP + Effective transport management system + Short Sea Shipping + air traffic management + Sea hubs and the motorways of the sea + Urban mobility solutions + Smart city planning + Maritime spatial planning + EUropean rail Research Network of Excellence + ENERGY CONSUMPTION BY THE TRANSPORT SECTOR + Integrated urban plan + inland waterway services + European Conference of Transport Research Institutes + air vehicles + E-freight + Automated Driving + Automated ships + pricing for cross-border passenger transport + Vehicle efficiency + Railway transport + Electric vehicles + Road traffic monitoring + Deep sea shipping + Circular economy in transport + Traffic congestion + air transport system + Urban logistics + Rail transport + OpenStreetMap + high speed rail + Transportation engineering + Intermodal travel information + Flight Data Recorders + Advanced driver assistance systems + long distance freight transport + Inland waterway transport + Smart mobility + Mobility integration + Personal Rapid Transit system + Safety measures & requirements for roads + Green rail transport + Vehicle manufacturing + Future Airport Layout + Rail technologies + European Intermodal Research Advisory Council + inland navigation + Automated urban vehicles + ECSS-standards + Traveller services + Polluting transport + Air Traffic Control + Cooperative and connected and automated transport + Innovative powertrains + Quality of transport system and services + door-to- door logistics chain + Inter-modal aspects of urban mobility + Innovative freight delivery systems + urban freight delivery infrastructures + + + + doajarticles::1c5bdf8fca58937894ad1441cca99b76 + + + + doajarticles::b37a634324a45c821687e6e80e6f53b4 + + + + doajarticles::4bf64f2a104040e4e055cd9594b2d77c + + + + doajarticles::479ca537c12755d1868bbf02938a900c + + + + doajarticles::55f31df96a60e2309f45b7c265fcf7a2 + + + + doajarticles::c52a09891a5301f9986ebbfe3761810c + + + + doajarticles::379807bc7f6c71a227ef1651462c414c + + + + doajarticles::36069db531a00b85a2e8fb301f4bdc19 + + + + doajarticles::b6a898da311ded96fabf49c520b80d5d + + + + doajarticles::d0753d9180b35a271d8b4a31f449749f + + + + doajarticles::172050a92511838393a3fe237ae47e31 + + + + doajarticles::301ed96c62abb160a3e29796efe5c95c + + + + doajarticles::0f4f805b3d842f2c7f1b077c3426fa59 + + + + doajarticles::ba73728b84437b8d48ae287b867c7215 + + + + doajarticles::86faef424d804309ccf45f692523aa48 + + + + doajarticles::73bd758fa41671de70964c3ecba013af + + + + doajarticles::e661fc0bdb24af42b740a08f0ddc6cf4 + + + + doajarticles::a6d3052047d5dbfbd43d95b4afb0f3d7 + + + + doajarticles::ca61df07089acc53a1569bde6673d82a + + + + doajarticles::237dd6f1606600459d0297abd8ed9976 + + + + doajarticles::fba6191177ede7c51ea1cdf58eae7f8b + + + + opendoar____::65a31da7ede4dc9b03fb5bbf8f442ce9 + + + + issn___print::5620fb341e160a153c489fcfab8a01ce + + + + + + be-open-transport + + + jsdtl + + + + utc-martrec + + + + utc-uti + + + + stp + + + + c2smart + + + + stride-utc + + + + crowd4roads + + + + lemo + + + + imov3d + + + + tra2018 + + + + optimum + + + + stars + + + + iecteim + + + + iccpt2019 + + + + + + + + + + citizen science + + + + + + + + + + Sustainability-oriented science policy + STI policies + science—society relations + Science & Technology Policy + Innovation policy + science policy + Policy and Law + + + + doajarticles::c6f0ed5fa41e98863e7c73501fe4bd6d + + + + doajarticles::ae4c7286c79590f19fdca670156ce816 + + + + doajarticles::0f664bce92ce953e0c7a92068c46bfb3 + + + + doajarticles::00017183dc4c858fb77541985323a4ef + + + + doajarticles::93b306f458cce3d7aaaf58c0a725f4f9 + + + + doajarticles::9dbf8fbf3e9fe0fe1fc01e55fbd90bfc + + + + doajarticles::a2bda8785c863279bba4b8f34827b4c9 + + + + doajarticles::019a1fcb42c3fea1c1b689df76330b58 + + + + doajarticles::0daa8281938831e9c82bfed8b55a2975 + + + + doajarticles::f67ad6d268162079b3abd51a24468744 + + + + doajarticles::c6f0ed5fa41e98863e7c73501fe4bd6d + + + + doajarticles::ad114356e196a4a3d84dda59c720dacd + + + + doajarticles::01e8a54fdecaaf354c67a2dd74ae7d4f + + + + doajarticles::449305f096b10a9464449ff2d0e10e06 + + + + doajarticles::982c0c0ac378256254cce2fa6572bb6c + + + + doajarticles::49d6ed47138884566ce93cf0ccb12c02 + + + + doajarticles::a98e820dbc2e8ee0fc84ab66f263267c + + + + doajarticles::50b1ce37427b36368f8f0f1317e47f83 + + + + doajarticles::f0ec29b7450b2ac5d0ad45327eeb531a + + + + doajarticles::d8d421d3b0349a7aaa93758b27a54e84 + + + + doajarticles::7ffc35ac5133da01d421ccf8af5b70bc + + + + + + risis + + + + + + + + + + COVID19 + SARS-CoV + HCoV-19 + mesh:C000657245 + MERS-CoV + Síndrome Respiratorio Agudo Severo + mesh:COVID-19 + COVID2019 + COVID-19 + SARS-CoV-2 + 2019 novel coronavirus + severe acute respiratory syndrome coronavirus 2 + Orthocoronavirinae + Coronaviridae + mesh:D045169 + coronavirus + SARS + coronaviruses + coronavirus disease-19 + sars cov 2 + Middle East Respiratory Syndrome + Severe acute respiratory syndrome coronavirus 2 + Severe Acute Respiratory Syndrome + coronavirus disease 2019 + 2019-nCoV + fos1 + fos2 + sdg1 + sdg2 + + + + opendoar____::358aee4cc897452c00244351e4d91f69 + {"criteria":[{"constraint":[{"verb":"contains","field":"title","value":"COVID-19"}]}, + {"constraint":[{"verb":"contains","field":"title","value":"SARS-CoV-2"}]}, + {"constraint":[{"verb":"contains","field":"title","value":"2019-nCoV"}]}]} + + + + re3data_____::7b0ad08687b2c960d5aeef06f811d5e6 + {"criteria":[{"constraint":[{"verb":"contains","field":"title","value":"COVID-19"}]}, + {"constraint":[{"verb":"contains","field":"title","value":"SARS-CoV-2"}]}, + {"constraint":[{"verb":"contains","field":"title","value":"2019-nCoV"}]}]} + + + + driver______::bee53aa31dc2cbb538c10c2b65fa5824 + {"criteria":[{"constraint":[{"verb":"contains","field":"title","value":"COVID-19"}]}, + {"constraint":[{"verb":"contains","field":"title","value":"SARS-CoV-2"}]}, + {"constraint":[{"verb":"contains","field":"title","value":"2019-nCoV"}]}]} + + + + openaire____::437f4b072b1aa198adcbc35910ff3b98 + {"criteria":[{"constraint":[{"verb":"contains","field":"title","value":"COVID-19"}]}, + {"constraint":[{"verb":"contains","field":"title","value":"SARS-CoV-2"}]}, + {"constraint":[{"verb":"contains","field":"title","value":"2019-nCoV"}]}]} + + + + openaire____::081b82f96300b6a6e3d282bad31cb6e2 + {"criteria":[{"constraint":[{"verb":"contains","field":"title","value":"COVID-19"}]}, + {"constraint":[{"verb":"contains","field":"title","value":"SARS-CoV-2"}]}, + {"constraint":[{"verb":"contains","field":"title","value":"2019-nCoV"}]}]} + + + + openaire____::9e3be59865b2c1c335d32dae2fe7b254 + {"criteria":[{"constraint":[{"verb":"contains","field":"title","value":"COVID-19"}]}, + {"constraint":[{"verb":"contains","field":"title","value":"SARS-CoV-2"}]}, + {"constraint":[{"verb":"contains","field":"title","value":"2019-nCoV"}]}]} + + + + opendoar____::8b6dd7db9af49e67306feb59a8bdc52c + {"criteria":[{"constraint":[{"verb":"contains","field":"title","value":"COVID-19"}]},{"constraint":[{"verb":"contains","field":"title","value":"SARS-CoV-2"}]},{"constraint":[{"verb":"contains","field":"title","value":"2019-nCoV"}]},{"constraint":[{"verb":"contains","field":"title","value":"2019nCoV"}]},{"constraint":[{"verb":"contains","field":"title","value":"coronavirus"}]},{"constraint":[{"verb":"contains","field":"title","value":"Corona virus"}]},{"constraint":[{"verb":"contains","field":"title","value":"corona-virus"}]},{"constraint":[{"verb":"contains","field":"title","value":"corona viruses"}]},{"constraint":[{"verb":"contains","field":"title","value":"coronaviruses"}]},{"constraint":[{"verb":"contains","field":"title","value":"SARS-CoV"}]},{"constraint":[{"verb":"contains","field":"title","value":"Orthocoronavirinae"}]},{"constraint":[{"verb":"contains","field":"title","value":"MERS-CoV"}]},{"constraint":[{"verb":"contains","field":"title","value":"Severe Acute Respiratory Syndrome"}]},{"constraint":[{"verb":"contains","field":"title","value":"Middle East Respiratory Syndrome"}]},{"constraint":[{"verb":"contains","field":"title","value":"SARS"},{"verb":"contains","field":"title","value":"virus"}]},{"constraint":[{"verb":"contains","field":"title","value":"soluble ACE2"}]},{"constraint":[{"verb":"contains","field":"title","value":"ACE2"},{"verb":"contains","field":"title","value":"virus"}]},{"constraint":[{"verb":"contains","field":"title","value":"ARDS"},{"verb":"contains","field":"title","value":"virus"}]},{"constraint":[{"verb":"contains","field":"title","value":"virus"},{"verb":"contains","field":"title","value":"angiotensin-converting enzyme 2"}]},{"constraint":[{"verb":"contains","field":"description","value":"angiotensin-converting enzyme 2"},{"verb":"contains","field":"description","value":"virus"}]},{"constraint":[{"verb":"contains","field":"description","value":"ARDS"},{"verb":"contains","field":"description","value":"virus"}]},{"constraint":[{"verb":"contains","field":"description","value":"soluble ACE2"}]},{"constraint":[{"verb":"contains","field":"description","value":"ACE2"},{"verb":"contains","field":"description","value":"virus"}]},{"constraint":[{"verb":"contains","field":"description","value":"SARS"},{"verb":"contains","field":"description","value":"virus"}]},{"constraint":[{"verb":"contains","field":"description","value":"Orthocoronavirinae"}]},{"constraint":[{"verb":"contains","field":"description","value":"Severe Acute Respiratory Syndrome"}]},{"constraint":[{"verb":"contains","field":"description","value":"Middle East Respiratory Syndrome"}]}]} + + + share_______::4719356ec8d7d55d3feb384ce879ad6c + {"criteria":[{"constraint":[{"verb":"contains","field":"title","value":"COVID-19"}]}, + {"constraint":[{"verb":"contains","field":"title","value":"SARS-CoV-2"}]}, + {"constraint":[{"verb":"contains","field":"title","value":"2019-nCoV"}]}]} + + + + share_______::bbd802baad85d1fd440f32a7a3a2c2b1 + {"criteria":[{"constraint":[{"verb":"contains","field":"title","value":"COVID-19"}]}, + {"constraint":[{"verb":"contains","field":"title","value":"SARS-CoV-2"}]}, + {"constraint":[{"verb":"contains","field":"title","value":"2019-nCoV"}]}]} + + + + opendoar____::6f4922f45568161a8cdf4ad2299f6d23 + {"criteria":[{"constraint":[{"verb":"contains","field":"title","value":"COVID-19"}]}, + {"constraint":[{"verb":"contains","field":"title","value":"SARS-CoV-2"}]}, + {"constraint":[{"verb":"contains","field":"title","value":"2019-nCoV"}]}]} + + + + re3data_____::7980778c78fb4cf0fab13ce2159030dc + {"criteria":[{"constraint":[{"verb":"contains","field":"title","value":"SARS-CoV-2"}]},{"constraint":[{"verb":"contains","field":"title","value":"COVID-19"}]},{"constraint":[{"verb":"contains","field":"title","value":"2019-nCov"}]}]} + + + re3data_____::978378def740bbf2bfb420de868c460b + {"criteria":[{"constraint":[{"verb":"contains","field":"title","value":"SARS-CoV-2"}]},{"constraint":[{"verb":"contains","field":"title","value":"COVID-19"}]},{"constraint":[{"verb":"contains","field":"title","value":"2019-nCov"}]}]} + + + opendoar____::eda80a3d5b344bc40f3bc04f65b7a357 + {"criteria":[{"constraint":[{"verb":"contains","field":"title","value":"COVID-19"}]},{"constraint":[{"verb":"contains","field":"title","value":"SARS-CoV-2"}]},{"constraint":[{"verb":"contains","field":"title","value":"2019-nCoV"}]},{"constraint":[{"verb":"contains","field":"title","value":"2019nCoV"}]},{"constraint":[{"verb":"contains","field":"title","value":"coronavirus"}]},{"constraint":[{"verb":"contains","field":"title","value":"Corona virus"}]},{"constraint":[{"verb":"contains","field":"title","value":"corona-virus"}]},{"constraint":[{"verb":"contains","field":"title","value":"corona viruses"}]},{"constraint":[{"verb":"contains","field":"title","value":"coronaviruses"}]},{"constraint":[{"verb":"contains","field":"title","value":"SARS-CoV"}]},{"constraint":[{"verb":"contains","field":"title","value":"Orthocoronavirinae"}]},{"constraint":[{"verb":"contains","field":"title","value":"MERS-CoV"}]},{"constraint":[{"verb":"contains","field":"title","value":"Severe Acute Respiratory Syndrome"}]},{"constraint":[{"verb":"contains","field":"title","value":"Middle East Respiratory Syndrome"}]},{"constraint":[{"verb":"contains","field":"title","value":"SARS"},{"verb":"contains","field":"title","value":"virus"}]},{"constraint":[{"verb":"contains","field":"title","value":"soluble ACE2"}]},{"constraint":[{"verb":"contains","field":"title","value":"ACE2"},{"verb":"contains","field":"title","value":"virus"}]},{"constraint":[{"verb":"contains","field":"title","value":"ARDS"},{"verb":"contains","field":"title","value":"virus"}]},{"constraint":[{"verb":"contains","field":"title","value":"virus"},{"verb":"contains","field":"title","value":"angiotensin-converting enzyme 2"}]},{"constraint":[{"verb":"contains","field":"description","value":"angiotensin-converting enzyme 2"},{"verb":"contains","field":"description","value":"virus"}]},{"constraint":[{"verb":"contains","field":"description","value":"ARDS"},{"verb":"contains","field":"description","value":"virus"}]},{"constraint":[{"verb":"contains","field":"description","value":"soluble ACE2"}]},{"constraint":[{"verb":"contains","field":"description","value":"ACE2"},{"verb":"contains","field":"description","value":"virus"}]},{"constraint":[{"verb":"contains","field":"description","value":"SARS"},{"verb":"contains","field":"description","value":"virus"}]},{"constraint":[{"verb":"contains","field":"description","value":"Orthocoronavirinae"}]},{"constraint":[{"verb":"contains","field":"description","value":"Severe Acute Respiratory Syndrome"}]},{"constraint":[{"verb":"contains","field":"description","value":"Middle East Respiratory Syndrome"}]}]} + + + opendoar____::7e7757b1e12abcb736ab9a754ffb617a + {"criteria":[{"constraint":[{"verb":"contains","field":"description","value":"covid-19"}]},{"constraint":[{"verb":"contains","field":"description","value":"SARS-CoV-2"}]},{"constraint":[{"verb":"contains","field":"description","value":"coronavirus"}]},{"constraint":[{"verb":"contains","field":"title","value":"covid-19"}]},{"constraint":[{"verb":"contains","field":"title","value":"coronavirus"}]},{"constraint":[{"verb":"contains","field":"title","value":"SARS-CoV-2"}]}]} + + + openaire____::5992569aa739819f41881a6fb910faa6 + + + + opendoar____::a03fa30821986dff10fc66647c84c9c3 + {"criteria":[{"constraint":[{"verb":"contains","field":"description","value":"mers"}]},{"constraint":[{"verb":"contains","field":"description","value":"sars"}]},{"constraint":[{"verb":"contains","field":"description","value":"covid"}]},{"constraint":[{"verb":"contains","field":"description","value":"pandemic"}]}]} + + + issn___print::ae3b15bfdf009101df89bab7f4cdbf9d + + + + issn___print::8dcb493279954d297336f12cea63b59f + + + + + + covid-19 + + + chicago-covid-19 + + + + covid-19-tx-rct-stats-review + + + + covid_19_senacyt_abc_panama + + + + covid-nma + + + + cd2h-covid + + + + covid-19-centralamerica + + + + {"constraint":[{"criteria":[{"field":"field","verb":"verb","value","value}]}]} + + + + + + + + + + + + + + Capacitive energy storage + Total energy + Energy informatics + Water-energy nexus + Energy charge + Energy efficient algorithms + Energy impact + Wireless energy harvesting + Energy efficient architecture + Fuel efficiency + Energy efficient clustering + Dual energy + Energy demand management + Reduced energy expenditure + Intermittent energy source + Magnetic energy + Energy flow + Variable renewable energy + Water energy + Maximum energy capture + Energy cycle + Energy per instruction + High energy accelerator + Building energy needs + Electric potential energy + Energy Saving Module + energy diversification + Excessive energy + measurement_unit.energy_unit + Energy modeling + Specific energy + Energy structure + Energy reduction + Specific potential energy + Energy tax + Renewable energy storage + Energy consumption + Förster resonance energy transfer + Energy Management + Energy component + Energy transport + Moist static energy + Alternative energy + Energy estimation + Energy development + Renewable energy supply + Maximal energy + Energy harvesting + Net energy + Energy metrics + Smart energy systems + World energy consumption + Clean Energy Project + Energy requirement + Energy resources + Energy leakage + Solar Renewable Energy Certificate + Energy Fraction + Energy applications of nanotechnology + Renewable energy in the Philippines + Energy assessment + Sustainable building + bioethanol + Energy harvesting wireless sensor networks + Integrated energy system + Available energy + Random energy model + Acoustic energy + Mechanical energy storage + Energy policy + Energy products + Solar panel + Low energy + sustainable energy action plan + Cooling energy + Specific energy consumption + Electrochemical energy conversion + Renewable thermal energy + CO2 emissions + Battery energy storage + Energy efficient distillation + Dynamic energy budget + High energy irradiation + 100% renewable energy + building energy performance simulation + price of energy + Energy community + Thermal energy recovery + wind energy + German Renewable Energy Sources Act + Leakage energy + Photovoltaic solar energy + Renewable energy investment + Cebu Declaration on East Asian Energy Security + Energy equipartition + SECAP + Fusion energy gain factor + Direct energy conversion + energy market + Isotropic energy + energy efficiency + gross floor area + Energy regeneration + European Union energy policy + Energy harvester + Geothermal energy + Final energy + Thermoelectric energy harvesting + Energy taxis + Energy balance + Renewable energy penetration + Battery energy storage system + tidal power + Renewable energy in the European Union + Tidal current energy + Energy storage + Energy-efficient driving + Energy index + Environmental impact of the energy industry + Energy-plus-house + Waste-to-energy + Energy efficiency gap + Energy load + Energy rate + energy use + Energy management system + Energy users + High/low voltage + Solar energy conversion efficiency + Energy consumption optimization + Energy Security Act + energy regulations + carbon taxes + Energy efficient routing protocol + Gibbs free energy + power industry + waste of energy + District heating network + Solar thermal + photovoltaic + green fuel + heat recovery + Mechanical energy + Renewable energy in Argentina + Forms of energy + Surface energy flux + National Appliance Energy Conservation Act + Net energy gain + Transient energy function + Energy modulation + National Renewable Energy Action Plan + Harvesting lightning energy + Minimum total potential energy principle + Sustainable energy + Thermal activation energy + Decreased energy + Net energy value + Field energy + Graph energy + fossil fuel + Energy based + Home energy rating + Energy pricing + Kinetic energy recovery system + Linear energy transfer + Nuclear energy + Energy dose + Wind energy + energy statistics + Greenhouse gas + Energy infrastructure + International Framework for Nuclear Energy Cooperation + Renewable energy consumption + Visible energy + Energy control + greenhouse gas emissions + Domestic energy consumption + Nuclear energy policy + Energy auditing + Energy Tax Act + Operating energy + Wind energy conversion + Bottom-up energy modelling + jel:Q4 + Characteristic energy + Atomic Energy Act + Standard Gibbs free energy of formation + wind power + Energy efficient transmission + Energy independence + Excess energy + local climate action + energy conservation + energy distribution + Energy landscape + Positive energy theorem + Minimum free energy + Ocean thermal energy conversion + Pulse energy density + energy crisis + Energy method + Natural energy + Supercapacitor energy storage + Energy efficient scheduling + Renewable energy law + Solar thermal energy + ethanol + Potential energy surface + Energy management unit + energy sector + Energy productivity + Thermal energy storage system + substitute fuel + Energy aware scheduling + Aquifer thermal energy storage + Energy system design + Energy failure + Solar energy storage + Transportation energy + Clean energy + Energy behavior + Global energy + Rotational energy + Tidal stream energy + Convective available potential energy + Energy drift + emission abatement + Energy conservation measure + Renewable energy in China + Flywheel energy storage system + wave energy + Urban building energy modeling + Energy service + Impact energy absorption + sustainable urban mobility plan + Units of energy + Energy hub + Energy technology + Energy gradient + Energy management controller + Primary energy + Magnetic field energy + Boundary energy + Conservation of energy + energy recovery + Energy management + Statistical energy analysis + Energy materials + Energy integration + biomethane + geothermal energy + Embodied energy + Offshore renewable energy + Spectral energy distribution + waste to energy + Energy-Generating Resources + solar heat + Energy budget + Energy sector + energy storage + Atomic energy commission + Energy equivalent + Entropy (energy dispersal) + Joint Dark Energy Mission + Energy security + energy transport + Energy derivative + Secondary Energy Resource + Energy data + Energy sustainability + Energy balance equation + cost of energy + sustainability + National Energy Act + Specific surface energy + Seasonal thermal energy storage + Electric energy consumption + Binding energy + Energy company + Dual energy absorptiometry + Solar energy + Energy economics + Light energy + Energy cannibalism + coal + Energy balancing + Energy management software + energy conversion + Energy supply and demand + Energy transfer + renewable energy source + energy grid + Bio-energy with carbon capture and storage + Energy cost + Energy indicators + Energy trading + Free energy profile + Energy system + Balance energy + Energy Policy Act of 1992 + Energy level + Low-energy house + Energy retention + SEAP/SECAP + Energy performance indicators + Energy conservation in the United States + European Union energy label + High Energy Density Matter + Kinetic energy operator + Wind turbine + Zero-energy universe + Climate Change and Sustainable Energy Act 2006 + Energy forecasting + Energy transfer rate + Critical energy + Energy feedback + biogas + Energy density + Energy recovery + EU energy strategy + power sector + European Energy Charter + Gravitational energy + energy saving + Energy rate density + Energy supplementation + Energy characterization + Energy deficit + Energy information + Home energy monitor + Renewable energy system + Energy storage efficiency + Mass energy + Energy hierarchy + Energy margin + Energy storage sizing + Deep energy retrofit + Electric energy storage + Specific energy absorption + energy site + Electron kinetic energy + Bioenergy village + Energy amplifier + PV + Energy function minimization + Lower energy + Energy performance + Energy rationing + Power2gas + Energy collision + Energy scheduling + Minimum energy control + Energy loss + climate finance data + energy supply + Power2X + Heating energy + Wave energy converter + Bioenergy + Strain energy density function + Energy measure + External energy + Grid energy storage + Compressed air energy storage + Sound energy density + Renewable energy policy + heat demand estimation + Energy spectrum + Energy factor + Energy subsidies + alternative energy + Energy cascade + Energy accounting + building efficiency + Building energy + Energy distribution + Energy law + Energy returned on energy invested + SEAP + Energy consumption wireless sensor networks + solar radiation + Energy plan + electrical supply + Internal energy + Thermodynamic free energy + green energy + Seasonal energy efficiency ratio + Renewable energy + Gibbs energy minimization + Powerplants + Specific mechanical energy + Electric energy + Heat energy + Energy density functional + Thermoelectric energy conversion + energy research + carbon tax + Energy migration + Power lines + Renewable energy technology + Bioelectric Energy Sources + Dual Work Exchanger Energy Recovery + Energy allocation + Solar hydrogen energy cycle + Wireless sensor networks energy harvesting + Energy preservation + Zero-energy building + High-Energy Shock Waves + Energy intensity + Energy supply chain + Negative energy + World energy resources + Energy supply + Primary Energy Resources + Energy conservation + Operational energy + High kinetic energy + Distributed energy efficient clustering + Energy Slave + Energy system optimization + Electron energy loss spectroscopy + Primary energy consumption + EU energy policy + biomass + Excess heat + Vibrational energy + bioenergy + energy transition + Energy awareness + Energy simulation + Energy source + Energy converter + Energy transfer upconversion + Radiant energy + Renewable energy sources + High energy + Conservation energy + Energy demand + Hybrid energy + Minimal free energy + low-carbon energy + Surface energy balance + Energy absorbing + Energy exchange + non-renewable energy + Thermal energy harvesting + Energy dispatch + Wireless sensor networks energy conservation + Ionization energy + Energy conversion devices + Energy carrier + Positive energy + Energy particle + Activation energy + energy consumption + Potential energy + Surface energy transfer + Energy analyser + Normalized energy deviation + Energy pathways + Energy inequality + Energy efficient transformer + National Energy Modeling System + Stored energy + Renewable energy commercialization + Energy efficient computation + Intermediate energy + Energy operator + Aircraft specific energy + Heat storage + Energy interval + Low energy level + Efficient energy use + Zeeman energy + Energy absorption + Residential energy + Energy efficiency implementation + Energy metering + Turbulent energy dissipation + Marine energy + Renewable energy in Brazil + Energy consumption minimization + Threshold energy + Nonlinear energy operator + Worldwide energy supply + Energy and environmental engineering + Photovoltaic + Energy variation + Helmholtz free energy + sustainable energy and climate action plan + Photovoltaic energy conversion + Electrochemical energy storage + Turbulence kinetic energy + Energy distance + Energy shaping + Kinetic energy + United States energy independence + Renewable energy credit + Hydro energy + Secondary energy + Adsorption energy + energy legislation + Energy expenditure + Energy mix + Specific kinetic energy + Energy policy of the United States + Urban energy Planning + Bloom Energy + nuclear energy + Energy Policy + Renewable energy in Denmark + jel:Q - Agricultural and Natural Resource Economics • Environmental and Ecological Economics/Q.Q4 - Energy/Q.Q4.Q41 - Demand and Supply • Prices + State of energy + energy industry + Energy efficient lighting + Energy delivery + Energy statistics + Energy monitoring and targeting + energy requirements + Accumulator (energy) + World Energy Outlook + Energy dynamics + Cyber physical energy systems + Renewable Energy Certificate + Decarbonization + Thermal energy storage + thermal energy + clean energy + Energy minimization + Food energy + Energy deficiency + Energy reserves + Energy flow analysis + Energy plant + energy needs + Building energy simulation + sustainable energy + Energy sharing + Energy in the United States + biodiesel + Energy analysis + energy audit + biofuel + tidal energy + Energy planning + Battery energy + Total energy expenditure + Thermal energy + United States Department of Energy International Energy Storage Database + Energy shift + energy law + energy retrofit + energy network + Translational energy + Energy engineering + Recommended energy intake + Regenerative energy + Energy Planning + Energy-coupling factor transporter + Energy conservation training + Energy absorbers + Effective energy + Energy investment + Mandatory renewable energy target + Impact energy + solar energy + Energy Policy Act of 2005 + Energy liberalisation + Energy regulation + Energy matching + Specific orbital energy + energy union + European Green Deal + mesh:Energy Transfer* + Solar Energy Generating Systems + Thermal Energy Evaluation + Potential energy landscape + Low-energy transfer + Energy quality + Storage energy + Energy Policy and Conservation Act + Energy efficient routing + Plasma energy + heating and cooling planning + Weighted energy + PV system + PVGIS + Wind energy penetration + Core energy + Sustainable Energy Utility + Energy market + Chemical energy + Nuclear power + Abnormal energy expenditure + Hydropower + Energy efficient resource allocation + energy dependence + House Energy Rating + Waste-to-energy plant + Electrical energy consumption + Superconducting magnetic energy storage + Minimum ignition energy + Ambient energy + Energy policy of the European Union + Specific energy density + Energy transition + Earth's energy budget + Overall Energy Level + Energy poverty + Residual energy + jel:Q47 + Energy transformation + Reaction energy + European Union energy strategy + energy technology + jel:Q48 + energy production + Building energy analysis + Energy conservation behaviour + jel:Q43 + jel:Q42 + jel:Q41 + Photovoltaics + climate change + jel:Q40 + Flywheel energy storage + Energy profile + Energy policy of India + Life-cycle greenhouse-gas emissions of energy sources + renewable energy resource + Energy maximization + Energy recycling + fuel cell + sump + Elastic energy + Electrical energy storage + Sound energy flux + Radio frequency energy + Creative Energy Homes + Latent internal energy + Cryogenic energy storage + Radiant energy density + Energy conversion efficiency + Global Energy and Water Cycle Experiment + Energy agency + Vegetable oils as alternative energy + Energy conservation in the United Kingdom + Energy consulting + Reactive energy + EU energy label + Triple energy window + smart grids + UBEM + Renewable energy debate + Business Energy Investment Tax Credit + Missing energy + energy management + Clean Energy Trends + Energy constraint + Energy efficiency in transportation + Energy flux + Energy Independence and Security Act of 2007 + Wireless sensor networks energy consumption + Energy depletion + Energy diagram + energy policy + Energy education + Principle of minimum energy + Solar energy harvesting + Energy equation + Energy facilities + Solar energy conversion + Energy constrained + nuclear power + Energy value of coal + energy demand + Energy strategy + + + + doajarticles::378f23393289afc1485dbf50951c83bc + + + + issn___print::794931c2e2a944f232b7be048a992b7e + + + + doajarticles::4ddab50f556e87f404a9c41771b21479 + + + + issn___print::4f3d67374523bd3cf9b4dc4d2fba0672 + + + + issn___print::a1d71ded7373fb0b17d3196e6648290d + + + + issn___print::3009447f36554f46207c65c6cd4d000e + + + + issn___print::f40e59279254118b8544abbd1562e6f8 + + + + issn___print::734dc9d0e7999dfd18a7a9235dc78ffa + + + + issn___print::3bc84c65a7d16107f92244ebea08e4f9 + + + + issn___print::20ff108cff2d59f8653d07141080e6ba + + + + issn___print::f131339240ced03aeff0d31e8643d108 + + + + issn___print::0433c40c181250e613a006798787b334 + + + + issn___print::f311391ea17e0fa88440973e96ce5e4e + + + + issn___print::a5dfd3cdcc1765e4c70f991726ac519e + + + + doajarticles::4c271c9709234a3e7f4fc8a8c812cf71 + + + + issn___print::dddc76f0b0250dda4d77b4bed5f56d8e + + + + issn___print::55d5cd7ed8dc9ef5c11d3a3112cba74e + + + + issn___print::707c5a454e5578f5c2d2304b87585870 + + + + issn___print::5302a312bead62b4e2fa914414ca5f7a + + + + issn___print::ae3fd2a3f41c973b9399d5efe27a8903 + + + + doajarticles::6112fa00eb096757ee4ba4b09b781afa + + + + issn___print::f5ef2bab7971bf15d253f9a4bbe68edf + + + + issn___print::8f7a02b5ac5b9332fc594021e5a8ee72 + + + + issn___print::ea183a877f51df643ebb3305790f920f + + + + issn___print::a4057017e6ae6a2781ac080d833e7a71 + + + + issn___print::65d14890631b4ca90562e9b3615ab8a7 + + + + issn__online::ad1e1198fc25091286d16344572d39ad + + + + issn___print::39576b4c76a915e71e6b5d50ef858202 + + + + issn___print::7fae073a8c08f94615666cc1189b5619 + + + + issn___print::cbf922bd81163e87b0299d820ff04c68 + + + + issn___print::b68729533f73b9381026fdd14f2b59ce + + + + issn___print::c757da83f7a2e20f959fd2548cd374d8 + + + + issn___print::3cbc82f7988a09373a4af62661301d86 + + + + doajarticles::2fd197d7b76958a30f7629c628c198eb + + + + issn___print::2a802416595959a8c42a64a340a10a8b + + + + issn___print::10adbb6219626bcac6a7184a35ca610e + + + + issn___print::87c44cf55137e275bc8cfc2b401d712e + + + + doajarticles::4a953db632cb88557a5fa7001ede9366 + + + + doajarticles::770bcb69b29a456276deaab74370b38a + + + + doajarticles::e8cfe9ddf73f0bc47382651e47d402db + + + + doajarticles::588d3e297135b04fa55b7d4b7e63f293 + + + + issn___print::6692d48a9483d39eb00ea58ad4597ffd + + + + issn___print::5b7c3922839ccc57dd188c6beba4c81f + + + + doajarticles::2d249ee2a411ca1e7a279ed653f458ec + + + + doajarticles::977b478b26b90576200354fc469ba5b5 + + + + issn___print::0f30459db45ba1ac1494d356569b06fb + + + + doajarticles::0c64460df9954f5c37fa47266537e798 + + + + doajarticles::9383bdade5a94867881e9d98787469c1 + + + + issn___print::d9f859569d45a8321ff9c0907da24ad6 + + + + issn___print::0e5b45589090fc6b4d77e4d8e65ea744 + + + + doajarticles::1a4aa2b8e7809faf59a093f995ff5a82 + + + + copernicuspu::8e2e4026271bad083cc89686372b2a8d + + + + issn__online::b263a67ae761555fc70d90836abc2a8d + + + + issn___print::32431f247401d2f88ad0759f6d1c63f1 + + + + doajarticles::fc3a4a0ad9276fda3a214cd02a066625 + + + + doajarticles::9522fef66cf1a56de16f3e8318407611 + + + + doajarticles::4a953db632cb88557a5fa7001ede9366 + + + + doajarticles::588d3e297135b04fa55b7d4b7e63f293 + + + + issn___print::12ac046ec8c15629dc2b0810008613f3 + + + + issn___print::6e17d958aa518b30e0652ddf152f968b + + + + issn___print::fd1162ed87cb335b3d5eae943d0628dd + + + + issn___print::bd751f82aa675ad78b7562eb3a2e1f35 + + + + issn__online::84486f003e22f1fac66278cd8952938a + + + + doajarticles::b7dc652e21b36af079b53757691a6ca3 + + + + issn___print::6952d8022c8e9ec9f28a24e6d85acf19 + + + + issn___print::6952d8022c8e9ec9f28a24e6d85acf19 + + + + issn___print::7d15320423e2b5ed4801a4e1bcaae4dd + + + + issn__online::c8d63ee31388444322f445a6e89e59ec + + + + issn___print::dde724dc5252fa5c6e8200a029cccad4 + + + + issn___print::c99ce8ba549c4913378e385ba052cc7a + + + + doajarticles::af6f6a030aeb9a5d1c6195a78468326c + + + + doajarticles::231dbb771e41d40fb6b91fa0af500e15 + + + + doajarticles::16658d94a8c89cc2de136a0c30dd4def + + + + doajarticles::331251e4f5658b99ca627cd24bb77d44 + + + + doajarticles::0a2af36b18a10daa66e0b73b55979d69 + + + + doajarticles::c9ad3a2c992c8d8103b94005c1468bc4 + + + + doajarticles::49800a108346cfdb181cb2538815a215 + + + + doajarticles::f9fc16b1bf50973c0199e41af447fa47 + + + + doajarticles::833c821df765a78f000a771fbda16689 + + + + doajarticles::25557f1fa59bf7f45023ed5cda6df7d3 + + + + doajarticles::73792ee5f8e7261a56758942b94a4d50 + + + + doajarticles::b857adcf9f4e0115d43f63f88e4d678b + + + + issn___print::ed3f742b855d4a14483a269a42a4ec1a + + + + issn___print::d372096053db20adbf14023d422b8030 + + + + issn___print::f98cc04c0bfc1432e6d93fad91267b09 + + + + issn___print::3cca5d1e8d44126cd469a8bc5c3bd333 + + + + doajarticles::9bc8218a1bc1678a8c1a73862b12fe0c + + + + doajarticles::bfd3191c8fae0a88d32521892c8651e0 + + + + issn___print::e3e9eade2907c7cfa1b3cafd838883fb + + + + issn___print::5d82328f0122a7fabd4330a1e14e0f64 + + + + issn___print::24c3f8a2279b3d0b430618ac78f7f2ac + + + + issn__online::6546671a5c9a7b6411283fd57e039685 + + + + issn___print::34f3d766efd56c71453a66a050e997d0 + + + + issn___print::017e6ef3fe7d88724647f4d3246cbc65 + + + + issn___print::3e6ff3a23f843d6d1549f170c2c46972 + + + + issn___print::cbfc2b5d7b9a611b46a9ef75829d2a16 + + + + issn___print::c7c8c62925a18ccd1bd015c879523310 + + + + issn__online::46d9370aed9253c69ffe06c68ea65356 + + + + issn___print::d2af1baa408b67ddb0a144af9b3ee54b + + + + doajarticles::3d4e46c7afb9b633c89a4a6e04257c21 + + + + issn__online::db9d9c777170ba862898687da15b6f29 + + + + issn___print::0d8adb8fd67d14dbea8f6624e94c1aa5 + + + + issn___print::b1a8b119646d5707b8d698d5623488a8 + + + + issn__online::1035ba503d08dfebe72237f85666063e + + + + doajarticles::0db98bbf059a52fa5a186d6be1926254 + + + + issn__online::9660682282432dcb5fb0740c8689d360 + + + + issn___print::86ad3a4f2db1739d529ed0449256f0da + + + + issn__online::f9563fbac77fd2606d9f8d9e4c7f54b4 + + + + issn___print::637c479c49692a2a7d20e507b573ef65 + + + + doajarticles::12028c7a20214a8fe089f6ce43bcc672 + + + + issn___print::175614e57be1e97bc85bb62d35a7817c + + + + doajarticles::5117b0f68c38ec4c9bb29236dbde4427 + + + + issn___print::c48b8ae541e158d805902e7d5226be7e + + + + doajarticles::bfd3191c8fae0a88d32521892c8651e0 + + + + openaire____::991a5beae43cd80ee579843834857837 + + + + issn___print::f658aedd001a34c147068b44bf22e3d8 + + + + issn___print::b6d4b7ac7b55734e304b6839399d3a39 + + + + issn___print::114113dad4c775f0a8de2cecca02170a + + + + issn___print::1d3f43512c48f0c917ac2ea2704e8164 + + + + issn___print::693cd4e37de2c2191193bc3e8e2faa67 + + + + issn___print::0b4dd23f592ab65e3e4210876aa5a766 + + + + + + wind_energy + + + + water-energy_challenges + + + + oswec + + + + jet + + + + amadeus-737054 + + + + hit2gap + + + + energy_data + + + + sedinstcjre + + + + res + + + + brcorp2 + + + + temoa + + + + cabriss + + + + ensystra + + + + greensoul + + + + photovoltaic + + + + greco-787289 + + + + sunrise + + + + smartchp + + + + gunam-odak + + + + a-leaf + + + + friendship-eu-project + + + + astep + + + + wesc2019 + + + + wakebench + + + + iec_61400-15 + + + + ieawindtask32 + + + + iea_wind_task_43 + + + + setwind + + + + eerajpwind + + + + geogrsure-msu-labres + + + + vn-energy-system + + + + flexi-sync + + + + s3c + + + + h2020-greencharge + + + + res4build-project + + + + epanacea-project + + + + drop-it + + + + superpv + + + + mosaic-csp-h2020 + + + + next-csp + + + + solartwins + + + + polyphem + + + + powertac + + + + solarpanel + + + + chpm2030 + + + + selfie + + + + reflect-project + + + + geofit + + + + orc-plus + + + + geothermal-innovations + + + + + + + + + + Forest composition + Digitilization + unmanned vehicle + ALS + Digital Transformation + Brazil-nut forests + Biomass + Digitilization methods + Digital forest + Canopy dynamics + soil nutrients + Bartlett Experimental Forest + Internet of things + Forest-environmental policy + Aerial images + Aero-triangulation + digitization + optimal resource management + Environmental modeling + Beech + Biomass modelling + Forest height + Forest inventory + Forest Cover + angular sampling + Canopy openings + Forest classification + Classification and Regression Trees (CART) + Digital soil mapping + Airborne laser scanning (ALS) + Forest land register + remote sensing + Canopy height + Canopy gap + Deciduous forest phenology + digitisation + canopy radiative transfer + Geographic information system + Biomass estimation + Forest operations + Dendrometry + Change detection + Forest landscape + robotics + digital terrain data + Canopy cover imagery + rural area + weed control + digitilisation + Forested area + agriculture + ALOS + precision agriculture + AutoCAD Map + Canopy structure + Digital aerial photogrammetry (DAP) + Ecosystem services + Forest phenology + Advanced Land Observing Satellite (ALOS) + precision farming + weed detection + ArcGIS + Ecological complexity + Atmospheric correction + Forest Road Management + Digital Farming + forest canopy density + Canopy greenness + Floodplain forest + Chain homotopy + virtual agriculture system + Digital forest eco-station + Canopy height model + C-fixation + soil analysis + Fire detection + Forest density + Forest inventory data + digitilized forestry + Forest reserves + forestry + Color infrared photography + Leaf area index + + + + doajarticles::5ee3d790a128c78123fa09c0253dce88 + + + + doajarticles::5aaf3ad8c1e030c358fca3aa4ac549e5 + + + + + + egene3 + + + + solace + + + + pa17 + + + + smartakis + + + + phenology_camera + + + + desira + + + + diverfarming + + + + + + + + + + + + + + + + + + + + rest________::b8e502674c3c3499d5374e9b2ea6d8d5 + {"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"description","value":"Galaxy"}]}]} + + + + + galaxy-training + + + + + + + + + + + + + + + + + + + + + + + + + + + + doajarticles::a33380c4a2f88a95b33dfa3dce5b6e36 + {"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"description","value":"IPERION HS"}]}]} + + + doajarticles::31ead5598158eb984a066a2c02f73def + {"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"description","value":"IPERION HS"}]}]} + + + issn___print::c7bb369a18cbba7542bafaf9cefe158d + {"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"description","value":"IPERION HS"}]}]} + + + doajarticles::5c079f7714af5449e0bcfc45b26e24c6 + {"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"description","value":"IPERION HS"}]}]} + + + issn___print::a3a89a312094156d8c5cad74dce0e179 + {"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"description","value":"IPERION HS"}]}]} + + + issn___print::f1929e666e12d044929f6fc0584d7afc + {"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"description","value":"IPERION HS"}]}]} + + + issn__online::57c54e53351c958c789b2492ef1f07f0 + {"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"description","value":"IPERION HS"}]}]} + + + issn___print::ded98f749c3f184f163a2625a72fce24 + {"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"description","value":"IPERION HS"}]}]} + + + issn__online::1b36744601154b071569d18229c837e6 + {"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"description","value":"IPERION HS"}]}]} + + + doajarticles::3d2c5259d756c2543239586527e4597e + {"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"description","value":"IPERION HS"}]}]} + + + fairsharing_::c46482dd5d39742f0bfd417b492d0e8e + {"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"description","value":"IPERION HS"}]}]} + + + driver______::d90775d3c9c1f9069b98af3df0f2349d + {"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"description","value":"IPERION HS"}]}]} + + + + + 871034 + + + + + + + + {"criteria":[{"constraint":[{"verb":"equals_caseinsensitive","field":"subject","value":"North America"},{"verb":"starts_with","field":"fos","value":"05"}]},{"constraint":[{"verb":"equals_caseinsensitive","field":"subject","value":"North America"},{"verb":"starts_with","field":"fos","value":"06"}]},{"constraint":[{"verb":"equals_caseinsensitive","field":"subject","value":"Mexico"},{"verb":"equals_caseinsensitive","field":"subject","value":"United States"},{"verb":"equals_caseinsensitive","field":"subject","value":"Canada"},{"verb":"starts_with","field":"fos","value":"05"}]},{"constraint":[{"verb":"equals_caseinsensitive","field":"subject","value":"Mexico"},{"verb":"equals_caseinsensitive","field":"subject","value":"United States"},{"verb":"equals_caseinsensitive","field":"subject","value":"Canada"},{"verb":"starts_with","field":"fos","value":"06"}]}]} + + Franklin + Gabrielle Roy + Canadian history + order international + religious freedom in the United States + Subnational Dynamics in North America + T-MEC + canadian + black nationalism + american + transnational organized crime + chicana poetry + asylum + american political science + visitas presidenciales de Estados Unidos a México + Mexico-United States border studies + American culture + Canadian cultural industries + Cannabis in the United States + United Nations’s Sustainable Development Goals + international migration + DREAM Act + American politics + United States and Canada and + immigration diplomacy in the United States + northern border + fundamentalisms + Mexico-U.S + canadian conservatism + Seasonal Agricultural Worker Program (SAWP) + night studies + Latino studies + Monroe doctrine + Homelessness in Canada + Cannabis in Canada + Donald Trump + bilateral relations and mexico and canada + American political science + American indigenous people + American politicanl science + immigration reform + chicano movie + US politics + Justin Trudeau + nocturnality + Law and Migrations + Mexican-Americans + Obama + American Propaganda + Jefferson + Joe Biden + nearshoring + bilateral relations mexico and united states + David Thoreau + Us -Mexico transboundary + Canadian public policy + social responsability in Canada + return migration + Science Policy Network from Latin America + americanism + chicano writers + mexican american + section 232 + IEEPA + ACAAN + MIKTA + DACA + cannabis workforce in Canada + American history + Canadian economics + Asylum system in the United States + migrant hospitality + financial geography + North America + immigration diplomacy in north america + Mexican cultural industries + Canada-Mexico relatios + Mexicans in Canada + Unites Nations Organizations + East Los Angeles + Canadian foreig policy + elections in the United States + critical sociocybernetics + Indigenous people in Mexico + IEA + American cultural industries + United States legislative agenda + american writers + pachucos + American sociology + U.S. elections + BlackLivesMatter + Indigenous peoples in Canada + Texas + Latino vote + American republicanism + American studies + US New Republic + cross-border studies + North American trade + Alanis Obomsawin + paradiplomacy + Mexico's national security agenda + Women in politics–United States + social responsability in the United States + U. S. Fed Rate + NAFTA + international cooperation + TLC + mexicanamerican + Kikapoo + third generation + american and mexican + Asylum system in Canada + NAFTA renegotiation + Border Cinema + immigration diplomacy in Mexico + USMCA + Cannabis in Mexico + Indigenous people in the United States + Mexico and United States and Canada + immigration diplomacy in Canada + Canadian documentary + african americans + Canadian political sciences + Obamacare + North American cultural industries + poesía chicana + Henry Thoreau + transbordern ethnic groups + United States-Mexico relations + armed forces in Mexico + cannabis workforce in the United States + Mexico-U.S. relations + Stephen Harper + Tijuana + bilateral relations canada and unites states + Biden administration + chicanos + TLCAN + Margaret Atwood + + + + opendoar____::1f187c8bc462403c4646ab271007edf4 + + + + issn___print::c81d1f92a13f46699b2706637518cd9d + + + + doajarticles::9e96ff35e9133ac652729510d4f458ae + + + + issn__online::dc9d3d191fa53003ad74da96d7c54081 + + + + issn___print::2a8223a02fe37659f1f41a936c58ba22 + + + + opendoar____::4fe5149039b52765bde64beb9f674940 + + + + issn___print::7963590588867473a2a288e7a8128a95 + + + + opendoar____::6855456e2fe46a9d49d3d3af4f57443d + + + + doajarticles::4169cb398ae09bf655681e3d63b85626 + + + + opendoar____::df438e5206f31600e6ae4af72f2725f1 + + + + doajarticles::e6e06c5ebc78077c8c9216a9dd339012 + + + + doajarticles::667fa8e87e3227492a459d40d7c3cbf8 + + + + doajarticles::8ceb968ff21b816cf07d226e62215990 + + + + doajarticles::73c5a0fc8bb4e2c403d34c24d19abd0c + + + + doajarticles::cc8c9387487bf0a9e96b61471c8c1674 + + + + re3data_____::5344264969b5c22913258ed1472940b7 + + + + issn___print::59c809b5d64c5c9d90887bb2432631e7 + + + + issn___print::04ffbf14e41d8291490e6fabd8ba25a5 + + + + issn___print::b3f6915e2a42405432018c218e2bc046 + + + + issn___print::cbddedbee3f3125938475200d5c33ac4 + + + + issn___print::96284ccfa45a84adc6bbb53c93f60c6d + + + + issn___print::f81291a280159d7849f0b61c0b7b0a27 + + + + issn___print::9cfa23b1eaa9896ea47d3519677b83b4 + + + + issn___print::514dfefae9831c3162a726d513331407 + + + + issn___print::d3e8755e33cf8ebd641745bfbe31eb80 + + + + issn___print::d5666ff1d3049974db0f609c1d74f736 + + + + issn___print::71c973dede04b0bffe227a3ca03cc5bf + + + + tubitakulakb::1ab7ee9ef829b9639b04526fbdb67a0e + + + + issn___print::de72ede8f67d5994fdb7060a3cda653e + + + + issn___print::3d336235ea23bf401dc92652b5f6c7a6 + + + + issn__online::8daebf70fb06ae3ed8f7383b9ba60fb8 + + + + issn___print::4c33b45c2a9ca6f2cfc4658d53523d8b + + + + issn___print::71fb26a669071c88ccc12bbc63bae902 + + + + issn___print::cdfec9183456fd7e6b9548249db1e379 + + + + issn___print::653866dc426bbb54434b782984923309 + + + + issn__online::aac9e676c34557de2e080fb461adca0e + + + + doajarticles::1cc79af440f0bba958fa35457a72baae + + + + issn__online::f84153234b72a096c2fc4bacc113453b + + + + issn___print::be3e12e8b0c99b809ae9cbdf7947aa53 + + + + issn___print::be3e12e8b0c99b809ae9cbdf7947aa53 + + + + issn___print::047a33fd18c9c7dbdde67f23287d2584 + + + + issn___print::4c6e60d44b493f1ea39671953bf57329 + + + + issn___print::8f6cf08aa643f71c14f58d92d37cf683 + + + + issn___print::9eaf704faab839cb43f38865979d031f + + + + issn___print::e14c88ad57d8426462a83c4b2c7a3d40 + + + + issn___print::1ef7c3685ce95f9ce0c9face671c1951 + + + + doajarticles::d2d1ce3a698df45486cbaa82aadbafcb + + + + doajarticles::054c07862a1425b07ea9c081bafa3985 + + + + doajarticles::6e2713a56e3dbe9191b4c2e04fdf599d + + + + doajarticles::66a9f6b27c0b0615896a0260fbdf3802 + + + + doajarticles::c3ec1beabcac28888d8d8fb1c0280f79 + + + + doajarticles::e3fe4e335bdf842807cddce5cc703b3e + + + + doajarticles::ef2c5dea0942cb1bfd5a57f36f637172 + + + + doajarticles::979c00c48a5ab319c27e2c2810520c5c + + + + doajarticles::37edd95a65636941dc48b8cb9b5a23c2 + + + + issn___print::72526861bd8daeaec41489d261df2b57 + + + + issn___print::89ac5f035df502a919d6481ea0f33df7 + + + + issn___print::33732d354f733342d78c88bcf88b8901 + + + + doajarticles::94fecc336b91c2f331b51740c8b42b50 + + + + issn___print::456da728c4210e7cfc9f7d390cd977c4 + + + + issn___print::e7bb39dc0ee73a76bee9f5de850c60c9 + + + + issn___print::fd1daded9e0aec3a6ac0ee7c41920775 + + + + issn___print::d4c451ae1c6f53814d3f31cc87a483f2 + + + + issn___print::84fdb48273b057287632a2716ebf0f40 + + + + issn___print::6b366c0dc2610d8e7e4350a0478fdd93 + + + + issn___print::2872324f81bd594a2e58c70960c87997 + + + + issn___print::abb513951560ba217896b3c3a11d8b9d + + + + doajarticles::b38923a0d8506ce7437162b3c3b99563 + + + + issn___print::6078b5609585f38bdcb679b84d4492f7 + + + + doajarticles::60b38bb04ec29b11a49e7b09561f139b + + + + issn___print::d90393e6784b81ac99b8b452dc6c6975 + + + + issn___print::d5ebe5e2efb6d4bd8000133a23c7c827 + + + + issn___print::478e407e430c267fd194a371a4b1cfbd + + + + issn___print::2c057b67956171db90e8383022823fd3 + + + + doajarticles::495a79be381509d144f7d4a4c92045c2 + + + + doajarticles::a0daf7d82967289b3a714ef0ec5d5ec8 + + + + issn___print::8795129abef8a4171a31172b82a5a154 + + + + doajarticles::f2aa57067b762baf9afd073783a07dff + + + + issn___print::44fbe50e48676c8d1425184cc746c735 + + + + issn___print::03715da1f69598f16d81fd205db4137b + + + + issn___print::28df3da83c03afe50979c2b718c1f031 + + + + issn___print::8668f3ec3f9ec98ec219ab5b915ce678 + + + + issn__online::d74a3b16f0f0c0946fcf6dde03382511 + + + + re3data_____::41f4ba2e210456c34bb26379d31ccffa + + + + re3data_____::ab5614004545931e59b747b4d3e01c35 + + + + re3data_____::f4bd51a04130e202ba61e46e485b0ff2 + + + + re3data_____::466f33717784c3be75c0ace1b4ae5c27 + + + + issn___print::f8d4bd706f820783eb6c896c879129ca + + + + issn__online::e359aa89935a92ef8e53d619c3935ad2 + + + + issn__online::f0ede569837b192ff090564f3f62fc95 + + + + issn___print::7d75a1706f95cd0af199f9ce148cafb1 + + + + doajarticles::947924312d932d2b7bab2a9cd153d24f + + + + issn___print::fd0a88805302f01ebfbaffc4a545acbc + + + + issn___print::7fce3dcfa53c60ad5cd043bf6a300359 + + + + issn___print::0d7dd9cfa89dc0f942b1faee95b3fc91 + + + + doajarticles::7215b846a137c298fc4c49049eae0a71 + + + + issn___print::f1da156437488bfb87ba28070923fb40 + + + + issn___print::47407663ad0e1d2923061e4a310e8378 + + + + issn___print::4429459053c2de5d0b71f8292735adf6 + + + + issn___print::6529eee69518785d190ccb2da5bef1c4 + + + + issn___print::5302a312bead62b4e2fa914414ca5f7a + + + + doajarticles::ecbebb577e63b6828e2f45e425709709 + + + + doajarticles::6b4754031be51e96afde788f9ab73b55 + + + + issn___print::1d3f43512c48f0c917ac2ea2704e8164 + + + + issn___print::92dd258c61077f12a507336d81d01c43 + + + + fairsharing_::582967e09f1b30ca2539968da0a174fa + + + + re3data_____::a5a47ec6d164c349a85343941a3c8df0 + + + + issn___print::2a17544dfe84de7372691e63e4060d1f + + + + issn___print::353e8c2c5c0d1d685f6840448caa0f22 + + + + issn___print::5185aeee99b5034543585e2df1b8aa55 + + + + issn___print::1f22adb40cf9ddda811f3dc94292124f + + + + issn___print::d911bdaf1ec51773dc299690d4d68668 + + + + issn___print::cd5de33eca81dcfcdbccc93131882f33 + + + + issn___print::ae1184d94e4d05628b07f58c41299217 + + + + doajarticles::2f9e138d3533252d0f927fb4c18e7ac7 + + + + doajarticles::4de11cf8024a72b61fe40a8d42233260 + + + + issn___print::0baff25ba18e72569586d9bb3d0d94f6 + + + + issn___print::89a3c620da346604515bff5eeadd4373 + + + + issn___print::76a4f76a7b6edb734a2feffbf569c312 + + + + issn___print::d640f84798f12e7b5cbdfb116e7016e9 + + + + issn___print::5ad6d5fae485d52279363a40b1a74b33 + + + + issn___print::9613ce105702ba484673ff768538887b + + + + issn___print::6e85e9494dbb6383174c25f890e76f6a + + + + issn___print::e084ba197764f259e12007692bcab2a0 + + + + issn___print::1c4febc7278f33506bc5c53415f6df94 + + + + issn___print::1648ec387ad0c64cedecc07c9efe8a69 + + + + issn___print::a1d39d2cf6306e0e47a30e539ac078ba + + + + issn___print::6f352f8d35638ebc7f9374450701c258 + + + + issn___print::d095a26b9039b91de446f1ca30b52fb8 + + + + issn___print::dde9939a66c680250b50e6167069f50e + + + + issn__online::8dd7b83b8e0de9c808185b47d60690bb + + + + doajarticles::81f495a74205fa1828ef69c73024d745 + + + + openaire____::3db634fc5446f389d0b826ea400a5da6 + + + + issn___print::26fb8ee35b0f016107407199f7caab8d + + + + issn___print::74fdbacf680dc2f894aff1f4aa86a033 + + + + issn__online::4074709fc410b3be61ea1769634ae8ff + + + + issn___print::528df3ad854f35905888312e67ab289c + + + + doajarticles::e0291a282bb60fbd7f1ed66eddd2dcd8 + + + + doajarticles::b20f009c8f772e5dbbfab8ef6112054a + + + + issn___print::59f48c33cbd0709bded62e11fb7fe7a6 + + + + doajarticles::e645826d355b95b452670a396ed5f1b6 + + + + doajarticles::26d1013e02712d468ffc673e973a94a4 + + + + issn___print::ff3375dbae7402eed827de6f9bc1dae7 + + + + doajarticles::8eb6a8ea9965381966de4e26b6df871d + + + + issn___print::6109e0b510a6f80b7f23ae2e9128bdb5 + + + + issn___print::4a286da974a11ee41bec3530c3087fe9 + + + + issn___print::89fa6b45b969f7a47000b6f4825cc317 + + + + issn___print::02c4ee5a5f50d46270d8a3c661751acc + + + + eurocrisdris::2c3c70ce4b8fac2cb2a644c8b09d6541 + + + + issn___print::01369779af4b50c35e0253a392a29f87 + + + + issn___print::c00b835ccece66bf64d18f45b4b35299 + + + + issn___print::77368c4ff3b22b665fd0f3d186cd34a0 + + + + issn___print::424781735140fd8adae8ac53cbeb46ce + + + + issn___print::b757ff8a198e2536db54d7660a75147f + + + + issn__online::fbf71200b9dc415cc77036429a39d415 + + + + issn___print::19cdd04d0918bc0b2e05854d4924d722 + + + + issn___print::b6aad0b1eb044e0a5d0f46743ed30412 + + + + issn___print::4960d8d70c93d5ec0ada530e783b7ccc + + + + issn___print::ef9470623c47db1489374825451be76a + + + + issn___print::268f6edcb8f52aff568ef5bf454a396b + + + + doajarticles::a3834c6b341698561729771fdb264ba8 + + + + doajarticles::10960937f086496a32324c9eeb1cf380 + + + + doajarticles::268f6edcb8f52aff568ef5bf454a396b + + + + doajarticles::8436515900e91b3177adbcd464b7a4fa + + + + doajarticles::0ebea7e81ff7eb8b49f5855869bf5510 + + + + issn___print::32b2935535a795cad2ac4d97ede8c9aa + + + + issn___print::c29800a010f18cc1cbfe5dfa8aff0101 + + + + issn___print::5cd28af4e630273b0f388fecee1642f2 + + + + issn__online::462ee7d3bfd901f543cc26dfd7b3f017 + + + + issn__online::3a1e9abdc733c2b4231e8d96b6eccb78 + + + + issn___print::ed0abeccd797edd7a4b88450e8834c7b + + + + issn___print::f733506374a5bb0ec8758f790d365bdd + + + + issn__online::bef3388f967015ba2a32c01c11872fec + + + + issn___print::a023d120ee80e5276337d6248f495e43 + + + + issn___print::d0cc53ffc712679bebc0500ed13ed2e9 + + + + doajarticles::154dc946bcecfd68e883ecd325e7a52e + + + + issn___print::806d0032b4709e91453b483347226887 + + + + doajarticles::a9b551c31e21ef34170ce17a8f0e7835 + + + + doajarticles::06ba084aa9240cb03d24c47c9c0652b1 + + + + issn___print::909bfe752bee4560f16ad0f8d5afdc89 + + + + doajarticles::ee4f1f49569f4fbb76f8f34dc7a5f7fe + + + + issn___print::2f1188253ef383b3c1b0e747639468bd + + + + issn___print::5be037642dfc13f2ba256f3b437c52cf + + + + tubitakulakb::154dc946bcecfd68e883ecd325e7a52e + + + + issn___print::7e05f77d163a1df1167892b2d7695f62 + + + + issn___print::ec7a7b0b0fa49b08a97ef3f6f4580849 + + + + doajarticles::0e79708049fc8aba865a72aef296675f + + + + issn___print::8bbe434f556cd8f473ec31870e90dceb + + + + issn___print::19d13e8ec06550805ad45ab6c39a6f5c + + + + doajarticles::42a2923544e8b4e3333cdbad12bf8e62 + + + + doajarticles::c2d19e06a2378a996a656e77621ad231 + + + + doajarticles::1745fdadf3fb137715761516836c86fe + + + + issn___print::edb440c3f196345083f5b6b93fa3f109 + + + + doajarticles::ffda7cfa7049bcee60eb3661f1fc6302 + + + + doajarticles::1ef9c362652cb36c1afae5de6efcf064 + + + + issn___print::e42314b3ca2fd1dc8e2069fcff7c9340 + + + + tubitakulakb::4c4a3f21e09817f9b52025982c5ad422 + + + + doajarticles::ce3bd037d3a68073789b81ee2f6db026 + + + + issn___print::488496b2b983a2e4fc84e0fdbe75bab9 + + + + issn___print::d2ef11cbb7657d6ca546f2f65ae27139 + + + + doajarticles::05ca952a7f3b9fdb6b25ad583eaf936e + + + + issn___print::d0cbb94a74ad1929fdce41df87f74ca8 + + + + issn___print::5156aa509e9a536fe0647609c2745ae1 + + + + issn___print::89726c9e90bfbc1dc1b2a23a7461542b + + + + issn___print::30a19d27491936692c9494d975738104 + + + + issn___print::85b683372b35b0ac1ffc3f005fbb5e64 + + + + issn___print::f2f417f07e0dac5721b81791ddb6e8e3 + + + + issn___print::028b2f5ff1f6e88f7ab301198eb439ee + + + + issn___print::d8a99c28771772da62fcce7732c856f9 + + + + doajarticles::539f5377e4c54415583f30f3b3d554a6 + + + + issn___print::ea4548e7a7c10e10ef00cd2a72c3a5ae + + + + issn___print::e33d602ec5bd0d2acae7c14491f2fed8 + + + + issn___print::732726ab6c58c5c242171a80337a63ee + + + + issn__online::53c59f6626ac8e0a55eeaeb90c5d424d + + + + issn___print::d90e317a3bb16d775b33a323955b3f4b + + + + issn___print::973b136a768ef3a79a6c3787444b7609 + + + + doajarticles::854b113d606bbd642b17319a439749d5 + + + + doajarticles::c0b2fa19ade5a8271beabbfea37b2f05 + + + + doajarticles::28006895efd68dbea8d01c41a9071f52 + + + + doajarticles::89ee434c803ac2cf40aa3061e04f8bd1 + + + + doajarticles::1f661b8c3323574bfd3afc0ad25b5d21 + + + + issn___print::ae5f7491d5fdd5d1fdbde58dd07bcb1d + + + + issn___print::17bb1eb108ebf498b7c46f21c1db0d65 + + + + issn___print::1f661b8c3323574bfd3afc0ad25b5d21 + + + + issn___print::bdaa0d91208602d95f66594b823b83b2 + + + + issn___print::9e8d02dd9ace5b0ffa75d88c9cf6fd0d + + + + ekt_journals::623ccc04b6662ad72270cdb48991ecfb + + + + issn__online::71d84e7b1644ae6cfe9bc38bbd401597 + + + + issn___print::74474623c6a870657b26b66ab88497d1 + + + + issn___print::541e430d092ac2733170a924818798ce + + + + issn___print::4594784eb987db460440f12f23b5e6f0 + + + + issn___print::9f8fcf4c8052a20c92b9e95e3be83bdd + + + + issn___print::0f1a67be184f30adcb7333645de30730 + + + + issn___print::098017da85509a583a1126589da11fbf + + + + issn___print::cf88f2df9b3d1031f07c02c43d92990f + + + + issn___print::ad6000958091232ca347c1f3fc2870cf + + + + issn___print::2da390ac801273ff8c74df019664bb06 + + + + issn___print::2ea05670ed9dafc3d3a5f2f9a278c787 + + + + issn___print::3d84cb4da9dd9ce42dafb4bd41749bc7 + + + + doajarticles::88dad2df202124b4e8efdf26279f9e08 + + + + doajarticles::50b1ce37427b36368f8f0f1317e47f83 + + + + issn___print::efc4f03c620f90768d85d846606842ba + + + + doajarticles::cf4dc68e46453154a9036d43e11ec1d2 + + + + doajarticles::da91a401ff7bcd79a593f89d2900b97c + + + + issn___print::c700c35cd16f0a1f5f19ea8634398997 + + + + issn__online::5706336ed60abf977e639ed00b507f6b + + + + issn__online::5ade1f356f78826b9f7f9eefc283ec75 + + + + opendoar____::aa495e18c7e3a21a4e48923b92048a61 + + + + issn___print::b9a1ad63db8612bfd4dcbf16be9e710c + + + + issn___print::6a673d2212466f53a4832f0b3d4d9800 + + + + issn__online::0ac3874fe35f4feec07071114b3bfcce + + + + issn__online::83a0220ed6b7ff03dea7aa1648b0a6dd + + + + issn___print::bd8e077363a889231b4c073865ccfd49 + + + + issn___print::2dd67b38137b33a088c57ea46c11e09b + + + + tubitakulakb::29a5b78ea8e203022df9a4397fc15f7c + + + + issn___print::a17503821e6d40aa794881d68b43de4e + + + + issn___print::2684a5315eede2aac88298275a56a7a3 + + + + doajarticles::cf84ce362f9a9fcdf92cc7caba6659d0 + + + + issn___print::d66d935ed1d9193cb4adce9d69b5e5e1 + + + + issn___print::7052e29d1c1ccda6fca247bd7f5220cc + + + + issn___print::39470625e1320f5443bb9734d3ed9754 + + + + issn___print::4175b388ddd0c0dc989a24e5607afeea + + + + issn___print::23057503007ed8003ee76b85092af050 + + + + issn___print::0fda292655c5449f877c78a3be38e658 + + + + issn___print::4d52f987747b37fbccf77d30457a0241 + + + + issn___print::2bf07566da78630b8c5d77ed67747694 + + + + issn___print::2122489d407b48ebfdffbb6aa6db1d0b + + + + issn__online::168e882916d55c390f44107654dedff0 + + + + doajarticles::f9c97816a124bdd3e60bd18f8f19fd90 + + + + issn___print::dfd198025502e93e11725d1ac2cf1487 + + + + issn___print::cbad7267197177d842dfe235bb69f693 + + + + issn___print::6bbac92b6e326f0679e324f92e90af92 + + + + issn___print::8334a639c2701e4c759c82a994b12cf0 + + + + issn___print::6f6844f1fb537c8f1551951792b4e470 + + + + issn___print::1cea83dbe32f464d7ca655d58d696e0f + + + + issn___print::a10b5ce7fc7f619b8fe22a38c26ba6d0 + + + + doajarticles::6f6844f1fb537c8f1551951792b4e470 + + + + doajarticles::ed28f28d0d5b17fde968103818b26690 + + + + re3data_____::5b66ed0a185464738b430fcf8a09b506 + + + + issn___print::037d6c2ed4579b18d7527e0b369e5c45 + + + + issn___print::42069521324683ae1f056757824569fa + + + + issn___print::f0892f57d532ed07583f13855e3c0c3b + + + + issn___print::75a8347b12cdc64eb16a466d2492108f + + + + issn__online::e6891a26e0a3a1e6cb95950778f8b4cb + + + + doajarticles::90e8756fd9d4ac0cc6e9f24c1220279c + + + + issn___print::607e56b055572145f31db7b6fa0bc34e + + + + issn___print::948e2b12b0a56f2c848f7cb58fbe2d05 + + + + issn___print::6ce6dc085cdafc874d3cdb38431a362a + + + + issn__online::110f84a256d463653f23e8a14f80a0e7 + + + + issn___print::8b8d184c5bdfa7f3f2b4ddb61e07d23f + + + + issn___print::44b81ba692f3a0df7c9de3a77e469d74 + + + + issn___print::f0bbd9395d0586c4983fa064beb29c71 + + + + issn___print::274b36b0d01c2d879c8135db0427ca3f + + + + issn___print::c270b488c67141597da83d9e32fba4d5 + + + + issn__online::a84ac9fb564bb124f1f7fca5e1d04063 + + + + issn__online::fd2bd37b60a4a7476df36978d4fe17a8 + + + + issn___print::05205fcd55e5e5aed7d6dd82440cece4 + + + + issn___print::9c2034c426356ed2d895b803871bcd96 + + + + issn___print::b0d55c4ecb17d1e2cbdc9840901eabd4 + + + + issn___print::2318cdcb2fb44038656517cfad49e21c + + + + issn___print::42dda4018b67d583260b9f959452d69c + + + + issn__online::534fe5f57bd789f86bda34deb92ae7d4 + + + + doajarticles::8affbbabd9f0f0130b2a80b4e11f1b8e + + + + issn__online::5510f64693d829a3de38c9dd79d8cace + + + + + + naddi + + + + + + From 1b8488976b5d02b39aff35195741b41c963f6e00 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 7 Dec 2022 10:45:38 +0100 Subject: [PATCH 38/55] code formatting --- .../community/QueryInformationSystem.java | 6 +-- .../dhp/bulktag/community/ResultTagger.java | 6 +-- .../dnetlib/dhp/bulktag/BulkTagJobTest.java | 2 +- .../CommunityConfigurationFactoryTest.java | 38 +++++++++---------- .../provision/IndexRecordTransformerTest.java | 11 +++--- 5 files changed, 32 insertions(+), 31 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/QueryInformationSystem.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/QueryInformationSystem.java index ec89edc63..678b270a0 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/QueryInformationSystem.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/QueryInformationSystem.java @@ -26,9 +26,9 @@ public class QueryInformationSystem { + " return " + " " + " { $x//CONFIGURATION/context/@id} " - + " " + - "{$x//CONFIGURATION/context/param[./@name='advancedConstraints']/text() }" + - "" + + " " + + "{$x//CONFIGURATION/context/param[./@name='advancedConstraints']/text() }" + + "" + " " + " {for $y in tokenize($subj,',') " + " return " diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java index d6498e942..0452a6ebf 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java @@ -9,16 +9,16 @@ import java.util.*; import java.util.stream.Collectors; import java.util.stream.Stream; -import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import org.apache.commons.lang3.StringUtils; import com.google.gson.Gson; import com.jayway.jsonpath.DocumentContext; import com.jayway.jsonpath.JsonPath; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; /** Created by miriam on 02/08/2018. */ public class ResultTagger implements Serializable { diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java index 364e53376..2af3cdb2e 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java @@ -48,7 +48,7 @@ public class BulkTagJobTest { + " \"description\" : \"$['description'][*]['value']\", " + " \"subject\" :\"$['subject'][*]['value']\" , " + - "\"fos\" : \"$['subject'][?(@['qualifier']['classid']=='subject:fos')].value\"} "; + "\"fos\" : \"$['subject'][?(@['qualifier']['classid']=='subject:fos')].value\"} "; private static SparkSession spark; diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/CommunityConfigurationFactoryTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/CommunityConfigurationFactoryTest.java index e3a660835..c8fd62c8e 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/CommunityConfigurationFactoryTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/CommunityConfigurationFactoryTest.java @@ -86,32 +86,32 @@ class CommunityConfigurationFactoryTest { @Test void loadSelCriteriaTest2() throws DocumentException, IOException, SAXException { String xml = IOUtils - .toString( - getClass() - .getResourceAsStream( - "/eu/dnetlib/dhp/bulktag/communityconfiguration/community_configuration_selcrit2.xml")); + .toString( + getClass() + .getResourceAsStream( + "/eu/dnetlib/dhp/bulktag/communityconfiguration/community_configuration_selcrit2.xml")); final CommunityConfiguration cc = CommunityConfigurationFactory.newInstance(xml); Map> param = new HashMap<>(); param.put("author", new ArrayList<>(Collections.singletonList("Pippo Pippi"))); param - .put( - "description", - new ArrayList<>( - Collections - .singletonList( - "This work has been partially supported by DARIAH-EU infrastructure"))); + .put( + "description", + new ArrayList<>( + Collections + .singletonList( + "This work has been partially supported by DARIAH-EU infrastructure"))); param - .put( - "contributor", - new ArrayList<>( - Collections - .singletonList( - "Author X helped to write the paper. X works for DARIAH"))); + .put( + "contributor", + new ArrayList<>( + Collections + .singletonList( + "Author X helped to write the paper. X works for DARIAH"))); List comm = cc - .getCommunityForDatasource( - "openaire____::1cfdb2e14977f31a98e0118283401f32", param); + .getCommunityForDatasource( + "openaire____::1cfdb2e14977f31a98e0118283401f32", param); - //TODO add more assertions + // TODO add more assertions Assertions.assertEquals(0, comm.size()); } diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java index d9dc609d8..cd5e08426 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java @@ -131,37 +131,38 @@ public class IndexRecordTransformerTest { @Test public void testForEOSCFutureSoftwareNotebook() throws IOException, TransformerException { final String record = IOUtils - .toString(getClass().getResourceAsStream("eosc-future/software-justthink.xml")); + .toString(getClass().getResourceAsStream("eosc-future/software-justthink.xml")); testRecordTransformation(record); } @Test public void testForEOSCFutureSoftwareNotebookClaim() throws IOException, TransformerException { final String record = IOUtils - .toString(getClass().getResourceAsStream("eosc-future/software-justthink-claim.xml")); + .toString(getClass().getResourceAsStream("eosc-future/software-justthink-claim.xml")); testRecordTransformation(record); } @Test public void testForEOSCFutureZenodo7353841() throws IOException, TransformerException { final String record = IOUtils - .toString(getClass().getResourceAsStream("eosc-future/zenodo7353841.xml")); + .toString(getClass().getResourceAsStream("eosc-future/zenodo7353841.xml")); testRecordTransformation(record); } @Test public void testForEOSCFutureZenodo7351393() throws IOException, TransformerException { final String record = IOUtils - .toString(getClass().getResourceAsStream("eosc-future/zenodo7351393.xml")); + .toString(getClass().getResourceAsStream("eosc-future/zenodo7351393.xml")); testRecordTransformation(record); } @Test public void testForEOSCFutureZenodo7351221() throws IOException, TransformerException { final String record = IOUtils - .toString(getClass().getResourceAsStream("eosc-future/zenodo7351221.xml")); + .toString(getClass().getResourceAsStream("eosc-future/zenodo7351221.xml")); testRecordTransformation(record); } + @Test void testDoiUrlNormalization() throws MalformedURLException { From 3d99b78d94a21e1bf4d74532ead853a968cffbec Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 8 Dec 2022 10:25:02 +0100 Subject: [PATCH 39/55] [Cleaning] fixed error in parameter (workingPath to workingDir) --- .../src/main/resources/eu/dnetlib/dhp/bulktag/query.xq | 0 .../eu/dnetlib/dhp/oa/graph/clean/oozie_app/workflow.xml | 8 ++++---- 2 files changed, 4 insertions(+), 4 deletions(-) create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/query.xq diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/query.xq b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/query.xq new file mode 100644 index 000000000..e69de29bb diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/clean/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/clean/oozie_app/workflow.xml index e756840bd..683c2417b 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/clean/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/clean/oozie_app/workflow.xml @@ -630,7 +630,7 @@ --inputPath${graphOutputPath}/publication --resolvedPath${workingDir}/cfHbResolved/publication - --outputPath${workingPath}/cfHbPatched/publication + --outputPath${workingDir}/cfHbPatched/publication --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Publication --masterDuplicatePath${workingDir}/masterduplicate @@ -657,7 +657,7 @@ --inputPath${graphOutputPath}/dataset --resolvedPath${workingDir}/cfHbResolved/dataset - --outputPath${workingPath}/cfHbPatched/dataset + --outputPath${workingDir}/cfHbPatched/dataset --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Dataset --masterDuplicatePath${workingDir}/masterduplicate @@ -684,7 +684,7 @@ --inputPath${graphOutputPath}/otherresearchproduct --resolvedPath${workingDir}/cfHbResolved/otherresearchproduct - --outputPath${workingPath}/cfHbPatched/otherresearchproduct + --outputPath${workingDir}/cfHbPatched/otherresearchproduct --graphTableClassNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct --masterDuplicatePath${workingDir}/masterduplicate @@ -711,7 +711,7 @@ --inputPath${graphOutputPath}/software --resolvedPath${workingDir}/cfHbResolved/software - --outputPath${workingPath}/cfHbPatched/software + --outputPath${workingDir}/cfHbPatched/software --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Software --masterDuplicatePath${workingDir}/masterduplicate From a485a94956782293d4be3470e366dfb114221755 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 8 Dec 2022 16:59:34 +0100 Subject: [PATCH 40/55] [Cleaning] fixed parameter name in property file --- .../eu/dnetlib/dhp/oa/graph/input_clean_cfhb_parameters.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/input_clean_cfhb_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/input_clean_cfhb_parameters.json index 934d173b5..859350bbb 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/input_clean_cfhb_parameters.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/input_clean_cfhb_parameters.json @@ -31,7 +31,7 @@ }, { "paramName": "md", - "paramLongName": "datasourceMasterDuplicate", + "paramLongName": "masterDuplicatePath", "paramDescription": "path to the file on HDFS holding the datasource id tuples [master, duplicate]", "paramRequired": true } From 2094fa6db0dda60222ae3e6c0bd5037d39a60e84 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 8 Dec 2022 17:22:26 +0100 Subject: [PATCH 41/55] [cleaning] align wf parameter names --- .../eu/dnetlib/dhp/oa/graph/clean/cfhb/CleanCfHbSparkJob.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/cfhb/CleanCfHbSparkJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/cfhb/CleanCfHbSparkJob.java index 122e27dec..eb7325af5 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/cfhb/CleanCfHbSparkJob.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/cfhb/CleanCfHbSparkJob.java @@ -65,8 +65,8 @@ public class CleanCfHbSparkJob { String outputPath = parser.get("outputPath"); log.info("outputPath: {}", outputPath); - String dsMasterDuplicatePath = parser.get("datasourceMasterDuplicate"); - log.info("datasourceMasterDuplicate: {}", dsMasterDuplicatePath); + String dsMasterDuplicatePath = parser.get("masterDuplicatePath"); + log.info("masterDuplicatePath: {}", dsMasterDuplicatePath); String graphTableClassName = parser.get("graphTableClassName"); log.info("graphTableClassName: {}", graphTableClassName); From 730228d73dbd6058cb0b2283412d9f3238dddc7a Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 8 Dec 2022 18:40:22 +0100 Subject: [PATCH 42/55] [cleaning] align wf parameter names in test --- .../dnetlib/dhp/oa/graph/clean/cfhb/CleanCfHbSparkJobTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/cfhb/CleanCfHbSparkJobTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/cfhb/CleanCfHbSparkJobTest.java index b0097ed6f..9096180ef 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/cfhb/CleanCfHbSparkJobTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/cfhb/CleanCfHbSparkJobTest.java @@ -143,7 +143,7 @@ public class CleanCfHbSparkJobTest { "--outputPath", outputPath, "--resolvedPath", resolvedPath + "/dataset", "--graphTableClassName", Dataset.class.getCanonicalName(), - "--datasourceMasterDuplicate", dsMasterDuplicatePath + "--masterDuplicatePath", dsMasterDuplicatePath }); assertTrue(Files.exists(Paths.get(graphOutputPath, "dataset"))); From 389dd25430c3fa193d93f74523f61a204fe4f50f Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 8 Dec 2022 18:40:48 +0100 Subject: [PATCH 43/55] [cleaning] avoid NPE --- .../dhp/oa/graph/clean/cfhb/CleanCfHbSparkJob.java | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/cfhb/CleanCfHbSparkJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/cfhb/CleanCfHbSparkJob.java index eb7325af5..f48226d71 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/cfhb/CleanCfHbSparkJob.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/cfhb/CleanCfHbSparkJob.java @@ -13,6 +13,7 @@ import java.util.stream.Stream; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.FilterFunction; import org.apache.spark.api.java.function.FlatMapFunction; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.MapGroupsFunction; @@ -105,6 +106,7 @@ public class CleanCfHbSparkJob { resolved .joinWith(md, resolved.col("cfhb").equalTo(md.col("duplicateId"))) .map(asIdCfHbMapping(), Encoders.bean(IdCfHbMapping.class)) + .filter((FilterFunction) m -> Objects.nonNull(m.getMasterId())) .write() .mode(SaveMode.Overwrite) .json(resolvedPath); @@ -134,9 +136,15 @@ public class CleanCfHbSparkJob { private static MapFunction, IdCfHbMapping> asIdCfHbMapping() { return t -> { - t._1().setMasterId(t._2().getMasterId()); - t._1().setMasterName(t._2().getMasterName()); - return t._1(); + final IdCfHbMapping mapping = t._1(); + Optional + .ofNullable(t._2()) + .ifPresent(t2 -> { + mapping.setMasterId(t2.getMasterId()); + mapping.setMasterName(t2.getMasterName()); + + }); + return mapping; }; } From 8b44afe5e547612c5922e8a76dbf2f1ade5ceb23 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 9 Dec 2022 15:44:57 +0100 Subject: [PATCH 44/55] [cleaning] avoid NPE --- .../eu/dnetlib/dhp/oa/graph/clean/cfhb/CleanCfHbSparkJob.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/cfhb/CleanCfHbSparkJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/cfhb/CleanCfHbSparkJob.java index f48226d71..7cc26745b 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/cfhb/CleanCfHbSparkJob.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/cfhb/CleanCfHbSparkJob.java @@ -157,7 +157,8 @@ public class CleanCfHbSparkJob { r.getInstance().stream().map(Instance::getHostedby).map(KeyValue::getKey), r.getInstance().stream().map(Instance::getCollectedfrom).map(KeyValue::getKey))) .distinct() - .map(s -> asIdCfHbMapping(r.getId(), s)) + .filter(StringUtils::isNotBlank) + .map(cfHb -> asIdCfHbMapping(r.getId(), cfHb)) .iterator(); } From c18b8048c378963641801caaff73cd708bfce5de Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Sat, 10 Dec 2022 11:41:38 +0100 Subject: [PATCH 45/55] [cleaning] avoid NPE --- .../graph/clean/cfhb/CleanCfHbSparkJob.java | 27 ++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/cfhb/CleanCfHbSparkJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/cfhb/CleanCfHbSparkJob.java index 7cc26745b..531b415ed 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/cfhb/CleanCfHbSparkJob.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/cfhb/CleanCfHbSparkJob.java @@ -151,11 +151,32 @@ public class CleanCfHbSparkJob { private static FlatMapFunction flattenCfHbFn() { return r -> Stream .concat( - r.getCollectedfrom().stream().map(KeyValue::getKey), + Optional + .ofNullable(r.getCollectedfrom()) + .map(cf -> cf.stream().map(KeyValue::getKey)) + .orElse(Stream.empty()), Stream .concat( - r.getInstance().stream().map(Instance::getHostedby).map(KeyValue::getKey), - r.getInstance().stream().map(Instance::getCollectedfrom).map(KeyValue::getKey))) + Optional + .ofNullable(r.getInstance()) + .map( + instances -> instances + .stream() + .map(i -> Optional.ofNullable(i.getHostedby()).map(KeyValue::getKey).orElse(""))) + .orElse(Stream.empty()) + .filter(StringUtils::isNotBlank), + Optional + .ofNullable(r.getInstance()) + .map( + instances -> instances + .stream() + .map( + i -> Optional + .ofNullable(i.getCollectedfrom()) + .map(KeyValue::getKey) + .orElse(""))) + .orElse(Stream.empty()) + .filter(StringUtils::isNotBlank))) .distinct() .filter(StringUtils::isNotBlank) .map(cfHb -> asIdCfHbMapping(r.getId(), cfHb)) From b8bafab8a0f380befb8544d2ffe44f898b89ffab Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 12 Dec 2022 14:43:03 +0100 Subject: [PATCH 46/55] [cleaning] improved vocabulary based mapping, specialization for the strict vocab cleaning --- .../dhp/common/vocabulary/Vocabulary.java | 27 ++++++++++++++----- .../oaf/utils/GraphCleaningFunctions.java | 2 +- .../dhp/oa/graph/clean/CleaningRuleMap.java | 2 +- .../dhp/oa/graph/clean/CleanContextTest.java | 8 +++--- .../dhp/oa/graph/clean/CleanCountryTest.java | 12 ++++----- .../clean/GraphCleaningFunctionsTest.java | 24 ++++++++++------- .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 21 ++++++++++++--- .../eu/dnetlib/dhp/oa/graph/clean/result.json | 15 +++++------ 8 files changed, 71 insertions(+), 40 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/Vocabulary.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/Vocabulary.java index 3a8df5c9e..2ab23bda6 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/Vocabulary.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/Vocabulary.java @@ -4,6 +4,7 @@ package eu.dnetlib.dhp.common.vocabulary; import java.io.Serializable; import java.util.HashMap; import java.util.Map; +import java.util.Objects; import java.util.Optional; import org.apache.commons.lang3.StringUtils; @@ -66,27 +67,39 @@ public class Vocabulary implements Serializable { } public Qualifier getTermAsQualifier(final String termId) { - if (StringUtils.isBlank(termId)) { + return getTermAsQualifier(termId, false); + } + + public Qualifier getTermAsQualifier(final String termId, boolean strict) { + final VocabularyTerm term = getTerm(termId); + if (Objects.nonNull(term)) { + return OafMapperUtils.qualifier(term.getId(), term.getName(), getId(), getName()); + } else if (Objects.isNull(term) && strict) { return OafMapperUtils.unknown(getId(), getName()); - } else if (termExists(termId)) { - final VocabularyTerm t = getTerm(termId); - return OafMapperUtils.qualifier(t.getId(), t.getName(), getId(), getName()); } else { return OafMapperUtils.qualifier(termId, termId, getId(), getName()); } } public Qualifier getSynonymAsQualifier(final String syn) { + return getSynonymAsQualifier(syn, false); + } + + public Qualifier getSynonymAsQualifier(final String syn, boolean strict) { return Optional .ofNullable(getTermBySynonym(syn)) - .map(term -> getTermAsQualifier(term.getId())) + .map(term -> getTermAsQualifier(term.getId(), strict)) .orElse(null); } public Qualifier lookup(String id) { + return lookup(id, false); + } + + public Qualifier lookup(String id, boolean strict) { return Optional - .ofNullable(getSynonymAsQualifier(id)) - .orElse(getTermAsQualifier(id)); + .ofNullable(getSynonymAsQualifier(id, strict)) + .orElse(getTermAsQualifier(id, strict)); } } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java index 363f95423..347d3eb20 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java @@ -333,7 +333,7 @@ public class GraphCleaningFunctions extends CleaningFunctions { if (Objects.isNull(i.getHostedby()) || StringUtils.isBlank(i.getHostedby().getKey())) { i.setHostedby(ModelConstants.UNKNOWN_REPOSITORY); } - if (Objects.isNull(i.getRefereed())) { + if (Objects.isNull(i.getRefereed()) || StringUtils.isBlank(i.getRefereed().getClassid())) { i.setRefereed(qualifier("0000", "Unknown", ModelConstants.DNET_REVIEW_LEVELS)); } if (Objects.nonNull(i.getDateofacceptance())) { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRuleMap.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRuleMap.java index 5f3b4e1ca..3d501bb27 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRuleMap.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRuleMap.java @@ -42,7 +42,7 @@ public class CleaningRuleMap extends HashMap, SerializableConsumer { if (ModelConstants.DNET_SUBJECT_KEYWORD.equalsIgnoreCase(subject.getQualifier().getClassid())) { - Qualifier newValue = vocabulary.lookup(subject.getValue()); + Qualifier newValue = vocabulary.lookup(subject.getValue(), true); if (!ModelConstants.UNKNOWN.equals(newValue.getClassid())) { subject.setValue(newValue.getClassid()); subject.getQualifier().setClassid(vocabularyId); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleanContextTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleanContextTest.java index e206c7c5a..91094f534 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleanContextTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleanContextTest.java @@ -82,10 +82,10 @@ public class CleanContextTest { CleanContextSparkJob.main(new String[] { "--isSparkSessionManaged", Boolean.FALSE.toString(), "--inputPath", workingDir.toString() + "/publication", - "-graphTableClassName", Publication.class.getCanonicalName(), - "-workingPath", workingDir.toString() + "/working", - "-contextId", "sobigdata", - "-verifyParam", "gCube " + "--graphTableClassName", Publication.class.getCanonicalName(), + "--workingDir", workingDir.toString() + "/working", + "--contextId", "sobigdata", + "--verifyParam", "gCube " }); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleanCountryTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleanCountryTest.java index c9f846570..caf19c21c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleanCountryTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleanCountryTest.java @@ -83,12 +83,12 @@ public class CleanCountryTest { CleanCountrySparkJob.main(new String[] { "--isSparkSessionManaged", Boolean.FALSE.toString(), "--inputPath", workingDir.toString() + "/publication", - "-graphTableClassName", Publication.class.getCanonicalName(), - "-workingPath", workingDir.toString() + "/working", - "-country", "NL", - "-verifyParam", "10.17632", - "-collectedfrom", "NARCIS", - "-hostedBy", getClass() + "--graphTableClassName", Publication.class.getCanonicalName(), + "--workingDir", workingDir.toString() + "/working", + "--country", "NL", + "--verifyParam", "10.17632", + "--collectedfrom", "NARCIS", + "--hostedBy", getClass() .getResource("/eu/dnetlib/dhp/oa/graph/clean/hostedBy") .getPath() }); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java index 4035307e5..fc7c6e5f1 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java @@ -7,6 +7,7 @@ import static org.mockito.Mockito.lenient; import java.io.IOException; import java.util.Collection; import java.util.List; +import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -278,20 +279,25 @@ public class GraphCleaningFunctionsTest { s -> "0102 computer and information sciences".equals(s.getValue()) & ModelConstants.DNET_SUBJECT_FOS_CLASSID.equals(s.getQualifier().getClassid()))); - List s1 = p_cleaned - .getSubject() - .stream() - .filter(s -> s.getValue().equals("In Situ Hybridization")) - .collect(Collectors.toList()); - assertNotNull(s1); - assertEquals(1, s1.size()); - assertEquals(ModelConstants.DNET_SUBJECT_KEYWORD, s1.get(0).getQualifier().getClassid()); - assertEquals(ModelConstants.DNET_SUBJECT_KEYWORD, s1.get(0).getQualifier().getClassname()); + verify_keyword(p_cleaned, "In Situ Hybridization"); + verify_keyword(p_cleaned, "Avicennia"); // TODO add more assertions to verity the cleaned values System.out.println(MAPPER.writeValueAsString(p_cleaned)); } + private static void verify_keyword(Publication p_cleaned, String subject) { + Optional s1 = p_cleaned + .getSubject() + .stream() + .filter(s -> s.getValue().equals(subject)) + .findFirst(); + + assertTrue(s1.isPresent()); + assertEquals(ModelConstants.DNET_SUBJECT_KEYWORD, s1.get().getQualifier().getClassid()); + assertEquals(ModelConstants.DNET_SUBJECT_KEYWORD, s1.get().getQualifier().getClassname()); + } + private Stream getAuthorPidTypes(Result pub) { return pub .getAuthor() diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index ad6ceef54..d08545388 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -26,6 +26,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.utils.GraphCleaningFunctions; import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; import eu.dnetlib.dhp.schema.oaf.utils.PidType; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @@ -238,7 +239,11 @@ class MappersTest { assertNotNull(i.getAccessright()); assertEquals("OPEN", i.getAccessright().getClassid()); }); - assertEquals("UNKNOWN", p.getInstance().get(0).getRefereed().getClassid()); + + Publication p_cleaned = cleanup(p, vocs); + assertEquals("0000", p_cleaned.getInstance().get(0).getRefereed().getClassid()); + assertEquals("Unknown", p_cleaned.getInstance().get(0).getRefereed().getClassname()); + assertNotNull(p.getInstance().get(0).getPid()); assertEquals(2, p.getInstance().get(0).getPid().size()); @@ -453,7 +458,10 @@ class MappersTest { assertNotNull(i.getAccessright()); assertEquals("OPEN", i.getAccessright().getClassid()); }); - assertEquals("UNKNOWN", p.getInstance().get(0).getRefereed().getClassid()); + + Publication p_cleaned = cleanup(p, vocs); + assertEquals("0000", p_cleaned.getInstance().get(0).getRefereed().getClassid()); + assertEquals("Unknown", p_cleaned.getInstance().get(0).getRefereed().getClassname()); } @Test @@ -570,7 +578,9 @@ class MappersTest { assertTrue(i.getUrl().contains("http://apps.who.int/trialsearch/Trial3.aspx?trialid=NCT02321059")); assertTrue(i.getUrl().contains("https://clinicaltrials.gov/ct2/show/NCT02321059")); - assertEquals("UNKNOWN", i.getRefereed().getClassid()); + Dataset d_cleaned = cleanup(d, vocs); + assertEquals("0000", d_cleaned.getInstance().get(0).getRefereed().getClassid()); + assertEquals("Unknown", d_cleaned.getInstance().get(0).getRefereed().getClassname()); } @Test @@ -871,7 +881,10 @@ class MappersTest { assertNotNull(i.getAccessright()); assertEquals("UNKNOWN", i.getAccessright().getClassid()); }); - assertEquals("UNKNOWN", p.getInstance().get(0).getRefereed().getClassid()); + + Dataset p_cleaned = cleanup(p, vocs); + assertEquals("0000", p_cleaned.getInstance().get(0).getRefereed().getClassid()); + assertEquals("Unknown", p_cleaned.getInstance().get(0).getRefereed().getClassname()); } @Test diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result.json index 84ff35c08..8f35470e1 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result.json @@ -907,24 +907,23 @@ { "dataInfo": { "deletedbyinference": false, - "inferenceprovenance": "", "inferred": false, "invisible": false, "provenanceaction": { - "classid": "sysimport:crosswalk:datasetarchive", - "classname": "sysimport:crosswalk:datasetarchive", + "classid": "sysimport:actionset", + "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions" }, "trust": "0.9" }, "qualifier": { - "classid": "", - "classname": "", - "schemeid": "", - "schemename": "" + "classid": "FOS", + "classname": "Fields of Science and Technology classification", + "schemeid": "dnet:subject_classification_typologies", + "schemename": "dnet:subject_classification_typologies" }, - "value": "doped silicon" + "value": "Avicennia" }, { "dataInfo": { From 7b80b24f8268b5283a56867feee02753f5fd516b Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 15 Dec 2022 14:49:04 +0100 Subject: [PATCH 47/55] [cleaning] country cleaning must use both PID and AlternateIdentifier fields --- .../clean/country/CleanCountrySparkJob.java | 44 ++++++++++++++++++- 1 file changed, 42 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/country/CleanCountrySparkJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/country/CleanCountrySparkJob.java index d8d803458..37e693de9 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/country/CleanCountrySparkJob.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/country/CleanCountrySparkJob.java @@ -4,9 +4,12 @@ package eu.dnetlib.dhp.oa.graph.clean.country; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; +import java.util.Collection; import java.util.List; import java.util.Optional; +import java.util.function.Function; import java.util.stream.Collectors; +import java.util.stream.Stream; import javax.swing.text.html.Option; @@ -30,6 +33,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.oa.graph.clean.CleanContextSparkJob; import eu.dnetlib.dhp.schema.oaf.Country; +import eu.dnetlib.dhp.schema.oaf.Instance; import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import eu.dnetlib.dhp.schema.oaf.utils.PidType; @@ -110,8 +114,8 @@ public class CleanCountrySparkJob implements Serializable { return r; } - if (r - .getPid() + List ids = getPidsAndAltIds(r).collect(Collectors.toList()); + if (ids .stream() .anyMatch( p -> p @@ -148,6 +152,42 @@ public class CleanCountrySparkJob implements Serializable { .json(inputPath); } + private static Stream getPidsAndAltIds(T r) { + final Stream resultPids = Optional + .ofNullable(r.getPid()) + .map(Collection::stream) + .orElse(Stream.empty()); + + final Stream instancePids = Optional + .ofNullable(r.getInstance()) + .map( + instance -> instance + .stream() + .flatMap( + i -> Optional + .ofNullable(i.getPid()) + .map(Collection::stream) + .orElse(Stream.empty()))) + .orElse(Stream.empty()); + + final Stream instanceAltIds = Optional + .ofNullable(r.getInstance()) + .map( + instance -> instance + .stream() + .flatMap( + i -> Optional + .ofNullable(i.getAlternateIdentifier()) + .map(Collection::stream) + .orElse(Stream.empty()))) + .orElse(Stream.empty()); + + return Stream + .concat( + Stream.concat(resultPids, instancePids), + instanceAltIds); + } + private static boolean pidInParam(String value, String[] verifyParam) { for (String s : verifyParam) if (value.startsWith(s)) From d791840b8202a587ca5e7193e18d1ec9de576ebc Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 16 Dec 2022 13:18:29 +0100 Subject: [PATCH 48/55] [Clean Country] added test to verify remove of country: --- .../dhp/oa/graph/clean/CleanCountryTest.java | 39 +++++++++++++++++++ .../oa/graph/clean/dataset_clean_country.json | 0 2 files changed, 39 insertions(+) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/dataset_clean_country.json diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleanCountryTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleanCountryTest.java index c9f846570..c964a3e41 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleanCountryTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleanCountryTest.java @@ -5,6 +5,7 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; +import eu.dnetlib.dhp.schema.oaf.Dataset; import org.apache.commons.io.FileUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; @@ -147,4 +148,42 @@ public class CleanCountryTest { .size()); } + @Test + public void testDatasetClean() throws Exception { + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/clean/dataset_clean_country.json") + .getPath(); + + spark + .read() + .textFile(sourcePath) + .map( + (MapFunction) r -> OBJECT_MAPPER.readValue(r, Dataset.class), + Encoders.bean(Dataset.class)) + .write() + .json(workingDir.toString() + "/dataset"); + + CleanCountrySparkJob.main(new String[] { + "--isSparkSessionManaged", Boolean.FALSE.toString(), + "--inputPath", workingDir.toString() + "/dataset", + "-graphTableClassName", Dataset.class.getCanonicalName(), + "-workingPath", workingDir.toString() + "/working", + "-country", "NL", + "-verifyParam", "10.17632", + "-collectedfrom", "NARCIS", + "-hostedBy", getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/clean/hostedBy") + .getPath() + }); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/dataset") + .map(item -> OBJECT_MAPPER.readValue(item, Dataset.class)); + + Assertions.assertEquals(1, tmp.count()); + + + } + } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/dataset_clean_country.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/dataset_clean_country.json new file mode 100644 index 000000000..e69de29bb From 8685eaa7064809e0ce13466b86973e76df353c49 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 16 Dec 2022 15:31:25 +0100 Subject: [PATCH 49/55] [Clean Country] added test to verify remove of country --- .../java/eu/dnetlib/dhp/oa/graph/clean/CleanCountryTest.java | 4 +++- .../eu/dnetlib/dhp/oa/graph/clean/dataset_clean_country.json | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleanCountryTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleanCountryTest.java index 651047b83..de9e4fc90 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleanCountryTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleanCountryTest.java @@ -167,7 +167,7 @@ public class CleanCountryTest { "--isSparkSessionManaged", Boolean.FALSE.toString(), "--inputPath", workingDir.toString() + "/dataset", "-graphTableClassName", Dataset.class.getCanonicalName(), - "-workingPath", workingDir.toString() + "/working", + "-workingDir", workingDir.toString() + "/working", "-country", "NL", "-verifyParam", "10.17632", "-collectedfrom", "NARCIS", @@ -183,6 +183,8 @@ public class CleanCountryTest { Assertions.assertEquals(1, tmp.count()); + Assertions.assertEquals(0, tmp.first().getCountry().size()); + } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/dataset_clean_country.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/dataset_clean_country.json index e69de29bb..f5c1fc334 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/dataset_clean_country.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/dataset_clean_country.json @@ -0,0 +1 @@ +{"geolocation": [], "dataInfo": {"provenanceaction": {"classid": "sysimport:dedup", "classname": "sysimport:dedup", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "dedup-result-decisiontree-v3", "invisible": false, "trust": "0.8"}, "resourcetype": {"classid": "dataset", "classname": "dataset", "schemeid": "dnet:dataCite_resource", "schemename": "dnet:dataCite_resource"}, "pid": [{"dataInfo": {"invisible": false, "provenanceaction": {"classid": "sysimport:actionset", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "trust": "0.9", "inferred": false, "deletedbyinference": false}, "qualifier": {"classid": "doi", "classname": "Digital Object Identifier", "schemeid": "dnet:pid_types", "schemename": "dnet:pid_types"}, "value": "10.17632/v6cgs4jpbk.2"}, {"dataInfo": {"invisible": false, "provenanceaction": {"classid": "sysimport:actionset", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "trust": "0.9", "inferred": false, "deletedbyinference": false}, "qualifier": {"classid": "doi", "classname": "Digital Object Identifier", "schemeid": "dnet:pid_types", "schemename": "dnet:pid_types"}, "value": "10.17632/v6cgs4jpbk.3"}, {"dataInfo": {"invisible": false, "provenanceaction": {"classid": "sysimport:actionset", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "trust": "0.9", "inferred": false, "deletedbyinference": false}, "qualifier": {"classid": "doi", "classname": "Digital Object Identifier", "schemeid": "dnet:pid_types", "schemename": "dnet:pid_types"}, "value": "10.17632/v6cgs4jpbk"}, {"dataInfo": {"invisible": false, "provenanceaction": {"classid": "sysimport:actionset", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "trust": "0.9", "inferred": false, "deletedbyinference": false}, "qualifier": {"classid": "doi", "classname": "Digital Object Identifier", "schemeid": "dnet:pid_types", "schemename": "dnet:pid_types"}, "value": "10.17632/v6cgs4jpbk.1"}], "contributor": [{"dataInfo": {"invisible": false, "provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "trust": "0.9", "inferred": false, "deletedbyinference": false}, "value": "somok bhowmik"}], "bestaccessright": {"classid": "OPEN", "classname": "Open Access", "schemeid": "dnet:access_modes", "schemename": "dnet:access_modes"}, "relevantdate": [{"qualifier": {"classid": "issued", "classname": "issued", "schemeid": "dnet:dataCite_date", "schemename": "dnet:dataCite_date"}, "value": "2022-08-16"}, {"qualifier": {"classid": "available", "classname": "available", "schemeid": "dnet:dataCite_date", "schemename": "dnet:dataCite_date"}, "value": "2023-08-23"}, {"dataInfo": {"invisible": false, "provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "trust": "0.9", "inferred": false, "deletedbyinference": false}, "qualifier": {"classid": "UNKNOWN", "classname": "UNKNOWN", "schemeid": "dnet:dataCite_date", "schemename": "dnet:dataCite_date"}, "value": "2022-08-16"}, {"dataInfo": {"invisible": false, "provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "trust": "0.9", "inferred": false, "deletedbyinference": false}, "qualifier": {"classid": "available", "classname": "available", "schemeid": "dnet:dataCite_date", "schemename": "dnet:dataCite_date"}, "value": "2022-08-16"}], "collectedfrom": [{"key": "10|openaire____::9e3be59865b2c1c335d32dae2fe7b254", "value": "Datacite"}, {"key": "10|openaire____::c6df70599aa984f16ee52b4b86d2e89f", "value": "DANS (Data Archiving and Networked Services)"}, {"key": "10|eurocrisdris::fe4903425d9040f680d8610d9079ea14", "value": "NARCIS"}, {"key": "10|re3data_____::84e123776089ce3c7a33db98d9cd15a8", "value": "EASY"}], "id": "50|doi_dedup___::e04c8cbefb6f0b8378a04c57e6edbd82", "subject": [{"dataInfo": {"provenanceaction": {"classid": "sysimport:crosswalk:datasetarchive", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.9"}, "qualifier": {"classid": "keyword", "classname": "keyword", "schemeid": "dnet:subject_classification_typologies", "schemename": "dnet:subject_classification_typologies"}, "value": "Interdisciplinary sciences"}], "lastupdatetimestamp": 1670362508719, "author": [{"surname": "Bhowmik", "name": "S.", "pid": [], "rank": 1, "affiliation": [{"dataInfo": {"invisible": false, "provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "trust": "0.9", "inferred": false, "deletedbyinference": false}, "value": "via Mendeley Data"}], "fullname": "bhowmik, S"}], "instance": [{"refereed": {"classid": "0000", "classname": "Unknown", "schemeid": "dnet:review_levels", "schemename": "dnet:review_levels"}, "hostedby": {"key": "10|re3data_____::db814dc656a911b556dba42a331cebe9", "value": "Mendeley Data"}, "license": {"value": "https://creativecommons.org/licenses/by/4.0/legalcode"}, "url": ["https://dx.doi.org/10.17632/v6cgs4jpbk.2"], "pid": [{"dataInfo": {"invisible": false, "provenanceaction": {"classid": "sysimport:actionset", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "trust": "0.9", "inferred": false, "deletedbyinference": false}, "qualifier": {"classid": "doi", "classname": "Digital Object Identifier", "schemeid": "dnet:pid_types", "schemename": "dnet:pid_types"}, "value": "10.17632/v6cgs4jpbk.2"}], "dateofacceptance": {"value": "2022-08-16"}, "collectedfrom": {"key": "10|openaire____::9e3be59865b2c1c335d32dae2fe7b254", "value": "Datacite"}, "accessright": {"classid": "UNKNOWN", "classname": "not available", "schemeid": "dnet:access_modes", "schemename": "dnet:access_modes"}, "instancetype": {"classid": "0021", "classname": "Dataset", "schemeid": "dnet:publication_resource", "schemename": "dnet:publication_resource"}}, {"refereed": {"classid": "0000", "classname": "Unknown", "schemeid": "dnet:review_levels", "schemename": "dnet:review_levels"}, "hostedby": {"key": "10|re3data_____::db814dc656a911b556dba42a331cebe9", "value": "Mendeley Data"}, "url": ["https://dx.doi.org/10.17632/v6cgs4jpbk.3"], "pid": [{"dataInfo": {"invisible": false, "provenanceaction": {"classid": "sysimport:actionset", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "trust": "0.9", "inferred": false, "deletedbyinference": false}, "qualifier": {"classid": "doi", "classname": "Digital Object Identifier", "schemeid": "dnet:pid_types", "schemename": "dnet:pid_types"}, "value": "10.17632/v6cgs4jpbk.3"}], "dateofacceptance": {"value": "2023-01-01"}, "collectedfrom": {"key": "10|openaire____::9e3be59865b2c1c335d32dae2fe7b254", "value": "Datacite"}, "accessright": {"classid": "UNKNOWN", "classname": "not available", "schemeid": "dnet:access_modes", "schemename": "dnet:access_modes"}, "instancetype": {"classid": "0021", "classname": "Dataset", "schemeid": "dnet:publication_resource", "schemename": "dnet:publication_resource"}}, {"refereed": {"classid": "0000", "classname": "Unknown", "schemeid": "dnet:review_levels", "schemename": "dnet:review_levels"}, "hostedby": {"key": "10|re3data_____::db814dc656a911b556dba42a331cebe9", "value": "Mendeley Data"}, "url": ["https://dx.doi.org/10.17632/v6cgs4jpbk"], "pid": [{"dataInfo": {"invisible": false, "provenanceaction": {"classid": "sysimport:actionset", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "trust": "0.9", "inferred": false, "deletedbyinference": false}, "qualifier": {"classid": "doi", "classname": "Digital Object Identifier", "schemeid": "dnet:pid_types", "schemename": "dnet:pid_types"}, "value": "10.17632/v6cgs4jpbk"}], "dateofacceptance": {"value": "2023-01-01"}, "collectedfrom": {"key": "10|openaire____::9e3be59865b2c1c335d32dae2fe7b254", "value": "Datacite"}, "accessright": {"classid": "UNKNOWN", "classname": "not available", "schemeid": "dnet:access_modes", "schemename": "dnet:access_modes"}, "instancetype": {"classid": "0021", "classname": "Dataset", "schemeid": "dnet:publication_resource", "schemename": "dnet:publication_resource"}}, {"refereed": {"classid": "0000", "classname": "Unknown", "schemeid": "dnet:review_levels", "schemename": "dnet:review_levels"}, "hostedby": {"key": "10|re3data_____::db814dc656a911b556dba42a331cebe9", "value": "Mendeley Data"}, "url": ["https://dx.doi.org/10.17632/v6cgs4jpbk.1"], "pid": [{"dataInfo": {"invisible": false, "provenanceaction": {"classid": "sysimport:actionset", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "trust": "0.9", "inferred": false, "deletedbyinference": false}, "qualifier": {"classid": "doi", "classname": "Digital Object Identifier", "schemeid": "dnet:pid_types", "schemename": "dnet:pid_types"}, "value": "10.17632/v6cgs4jpbk.1"}], "dateofacceptance": {"value": "2023-01-01"}, "collectedfrom": {"key": "10|openaire____::9e3be59865b2c1c335d32dae2fe7b254", "value": "Datacite"}, "accessright": {"classid": "UNKNOWN", "classname": "not available", "schemeid": "dnet:access_modes", "schemename": "dnet:access_modes"}, "instancetype": {"classid": "0021", "classname": "Dataset", "schemeid": "dnet:publication_resource", "schemename": "dnet:publication_resource"}}, {"refereed": {"classid": "0000", "classname": "UNKNOWN", "schemeid": "dnet:review_levels", "schemename": "dnet:review_levels"}, "hostedby": {"key": "10|openaire____::c6df70599aa984f16ee52b4b86d2e89f", "value": "DANS (Data Archiving and Networked Services)"}, "url": ["https://doi.org/10.17632/v6cgs4jpbk.2"], "pid": [], "alternateIdentifier": [{"dataInfo": {"provenanceaction": {"classid": "sysimport:crosswalk:datasetarchive", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.9"}, "qualifier": {"classid": "urn", "classname": "urn", "schemeid": "dnet:pid_types", "schemename": "dnet:pid_types"}, "value": "urn:nbn:nl:ui:13-q1-3218"}, {"dataInfo": {"provenanceaction": {"classid": "sysimport:crosswalk:datasetarchive", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.9"}, "qualifier": {"classid": "doi", "classname": "Digital Object Identifier", "schemeid": "dnet:pid_types", "schemename": "dnet:pid_types"}, "value": "10.17632/v6cgs4jpbk.2"}], "collectedfrom": {"key": "10|openaire____::c6df70599aa984f16ee52b4b86d2e89f", "value": "DANS (Data Archiving and Networked Services)"}, "accessright": {"classid": "UNKNOWN", "classname": "not available", "schemeid": "dnet:access_modes", "schemename": "dnet:access_modes"}, "instancetype": {"classid": "0021", "classname": "Dataset", "schemeid": "dnet:publication_resource", "schemename": "dnet:publication_resource"}}, {"refereed": {"classid": "0000", "classname": "UNKNOWN", "schemeid": "dnet:review_levels", "schemename": "dnet:review_levels"}, "hostedby": {"key": "10|eurocrisdris::fe4903425d9040f680d8610d9079ea14", "value": "NARCIS"}, "url": ["https://doi.org/10.17632/v6cgs4jpbk.2"], "pid": [], "alternateIdentifier": [{"dataInfo": {"provenanceaction": {"classid": "sysimport:crosswalk:datasetarchive", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.9"}, "qualifier": {"classid": "urn", "classname": "urn", "schemeid": "dnet:pid_types", "schemename": "dnet:pid_types"}, "value": "urn:nbn:nl:ui:13-q1-3218"}, {"dataInfo": {"provenanceaction": {"classid": "sysimport:crosswalk:datasetarchive", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.9"}, "qualifier": {"classid": "doi", "classname": "Digital Object Identifier", "schemeid": "dnet:pid_types", "schemename": "dnet:pid_types"}, "value": "10.17632/v6cgs4jpbk.2"}], "collectedfrom": {"key": "10|eurocrisdris::fe4903425d9040f680d8610d9079ea14", "value": "NARCIS"}, "accessright": {"classid": "OPEN", "classname": "Open Access", "schemeid": "dnet:access_modes", "schemename": "dnet:access_modes"}, "instancetype": {"classid": "0021", "classname": "Dataset", "schemeid": "dnet:publication_resource", "schemename": "dnet:publication_resource"}}, {"refereed": {"classid": "0000", "classname": "UNKNOWN", "schemeid": "dnet:review_levels", "schemename": "dnet:review_levels"}, "hostedby": {"key": "10|re3data_____::84e123776089ce3c7a33db98d9cd15a8", "value": "EASY"}, "license": {"dataInfo": {"invisible": false, "provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "trust": "0.9", "inferred": false, "deletedbyinference": false}, "value": "http://creativecommons.org/licenses/by/4.0"}, "url": ["https://doi.org/10.17632/v6cgs4jpbk.2"], "pid": [], "alternateIdentifier": [{"dataInfo": {"invisible": false, "provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "trust": "0.9", "inferred": false, "deletedbyinference": false}, "qualifier": {"classid": "urn", "classname": "urn", "schemeid": "dnet:pid_types", "schemename": "dnet:pid_types"}, "value": "urn:nbn:nl:ui:13-q1-3218"}, {"dataInfo": {"invisible": false, "provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "trust": "0.9", "inferred": false, "deletedbyinference": false}, "qualifier": {"classid": "doi", "classname": "Digital Object Identifier", "schemeid": "dnet:pid_types", "schemename": "dnet:pid_types"}, "value": "10.17632/v6cgs4jpbk.2"}], "dateofacceptance": {"dataInfo": {"invisible": false, "provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "trust": "0.9", "inferred": false, "deletedbyinference": false}, "value": "2022-01-01"}, "collectedfrom": {"key": "10|re3data_____::84e123776089ce3c7a33db98d9cd15a8", "value": "EASY"}, "accessright": {"classid": "OPEN", "classname": "Open Access", "schemeid": "dnet:access_modes", "schemename": "dnet:access_modes"}, "instancetype": {"classid": "0021", "classname": "Dataset", "schemeid": "dnet:publication_resource", "schemename": "dnet:publication_resource"}}], "dateofcollection": "2022-08-17T20:06:53+0000", "fulltext": [], "dateoftransformation": "2022-08-17T20:06:53+0000", "description": [{"dataInfo": {"provenanceaction": {"classid": "sysimport:crosswalk:datasetarchive", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.9"}, "value": "Unprocessed images for: Acinetobacter baumannii defends against oxidative stress through a Mn2+-dependent small RNA-mediated repression of type VI secretion system"}], "format": [], "measures": [{"id": "influence", "unit": [{"dataInfo": {"provenanceaction": {"classid": "measure:bip", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "key": "score", "value": "4.842839E-9"}, {"dataInfo": {"provenanceaction": {"classid": "measure:bip", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "key": "class", "value": "C"}]}, {"id": "popularity", "unit": [{"dataInfo": {"provenanceaction": {"classid": "measure:bip", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "key": "score", "value": "7.705171E-9"}, {"dataInfo": {"provenanceaction": {"classid": "measure:bip", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "key": "class", "value": "C"}]}, {"id": "influence_alt", "unit": [{"dataInfo": {"provenanceaction": {"classid": "measure:bip", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "key": "score", "value": "0"}, {"dataInfo": {"provenanceaction": {"classid": "measure:bip", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "key": "class", "value": "C"}]}, {"id": "popularity_alt", "unit": [{"dataInfo": {"provenanceaction": {"classid": "measure:bip", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "key": "score", "value": "0.0"}, {"dataInfo": {"provenanceaction": {"classid": "measure:bip", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "key": "class", "value": "C"}]}, {"id": "impulse", "unit": [{"dataInfo": {"provenanceaction": {"classid": "measure:bip", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "key": "score", "value": "0"}, {"dataInfo": {"provenanceaction": {"classid": "measure:bip", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "key": "class", "value": "C"}]}], "coverage": [], "externalReference": [], "publisher": {"value": "Mendeley"}, "context": [], "eoscifguidelines": [], "language": {"classid": "und", "classname": "Undetermined", "schemeid": "dnet:languages", "schemename": "dnet:languages"}, "resulttype": {"classid": "dataset", "classname": "dataset", "schemeid": "dnet:result_typologies", "schemename": "dnet:result_typologies"}, "country": [{"classid": "NL", "classname": "Netherlands", "dataInfo": {"provenanceaction": {"classid": "country:instrepos", "classname": "Propagation of country to result collected from datasources of type institutional repositories", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "propagation", "invisible": false, "trust": "0.85"}, "schemeid": "dnet:countries", "schemename": "dnet:countries"}], "extraInfo": [], "originalId": ["50|datacite____::e04c8cbefb6f0b8378a04c57e6edbd82", "10.17632/v6cgs4jpbk.2", "50|datacite____::1544eab177367edbc4d9d56517d482df", "10.17632/v6cgs4jpbk.3", "50|datacite____::f02e8a3c923d8e120e1cdc5d2dcda3ff", "10.17632/v6cgs4jpbk", "50|datacite____::fce3b034f047504961bc4baab3515295", "10.17632/v6cgs4jpbk.1", "50|DansKnawCris::3b86948c475d0efbde049b72579feb50", "oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:254617", "50|dris___00893::3b86948c475d0efbde049b72579feb50", "oai:easy.dans.knaw.nl:easy-dataset:254617", "50|r384e1237760::673150ea2ae00b606fd8c39897dfa3d7"], "source": [], "dateofacceptance": {"value": "2022-08-16"}, "title": [{"qualifier": {"classid": "main title", "classname": "main title", "schemeid": "dnet:dataCite_title", "schemename": "dnet:dataCite_title"}, "value": "Unprocessed images for: Acinetobacter baumannii defends against oxidative stress through a Mn2+-dependent small RNA-mediated repression of type VI secretion system"}]} \ No newline at end of file From f37113a9419114d6481689a592341cef380c4d34 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 16 Dec 2022 15:32:26 +0100 Subject: [PATCH 50/55] [BulkTag] moving xquery to get community configuration in dedicated file --- .../community/QueryInformationSystem.java | 75 +++---------------- .../resources/eu/dnetlib/dhp/bulktag/query.xq | 58 ++++++++++++++ 2 files changed, 68 insertions(+), 65 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/QueryInformationSystem.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/QueryInformationSystem.java index 678b270a0..5fe3cf81f 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/QueryInformationSystem.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/QueryInformationSystem.java @@ -1,8 +1,10 @@ package eu.dnetlib.dhp.bulktag.community; +import java.io.IOException; import java.util.List; +import org.apache.commons.io.IOUtils; import org.dom4j.DocumentException; import org.xml.sax.SAXException; @@ -13,74 +15,17 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; public class QueryInformationSystem { - private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') " - + " let $subj := $x//CONFIGURATION/context/param[./@name='subject']/text() " - + " let $datasources := $x//CONFIGURATION/context/category[./@id=concat($x//CONFIGURATION/context/@id,'::contentproviders')]/concept " - + " let $organizations := $x//CONFIGURATION/context/category[./@id=concat($x//CONFIGURATION/context/@id,'::resultorganizations')]/concept " - + " let $communities := $x//CONFIGURATION/context/category[./@id=concat($x//CONFIGURATION/context/@id,'::zenodocommunities')]/concept " - + " let $fos := $x//CONFIGURATION/context/param[./@name='fos']/text() " - + " let $sdg := $x//CONFIGURATION/context/param[./@name='sdg']/text() " - + - "let $zenodo := $x//param[./@name='zenodoCommunity']/text() " - + " where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] and $x//context/param[./@name = 'status']/text() != 'hidden' " - + " return " - + " " - + " { $x//CONFIGURATION/context/@id} " - + " " + - "{$x//CONFIGURATION/context/param[./@name='advancedConstraints']/text() }" + - "" - + " " - + " {for $y in tokenize($subj,',') " - + " return " - + " {$y}} " - + " {for $y in tokenize($fos,',') " - + " return " - + " {$y}} " - + " {for $y in tokenize($sdg,',') " - + " return " - + " {$y}} " - + " " - + " " - + " {for $d in $datasources " - + " where $d/param[./@name='enabled']/text()='true' " - + " return " - + " " - + " " - + " {$d//param[./@name='openaireId']/text()} " - + " " - + " " - + " {$d/param[./@name='selcriteria']/text()} " - + " " - + " } " - + " " + - " " + - "{for $zc in $zenodo " + - "return " + - " " + - " " + - "{$zc} " + - " " + - "}" - + " {for $zc in $communities " - + " return " - + " " - + " " - + " {$zc/param[./@name='zenodoid']/text()} " - + " " - + " " - + " {$zc/param[./@name='selcriteria']/text()} " - + " " - + " } " - + " " - + "" - + "{$x//CONFIGURATION/context/param[./@name='advancedConstraint']/text()} " - + "" - + " "; public static CommunityConfiguration getCommunityConfiguration(final String isLookupUrl) - throws ISLookUpException, DocumentException, SAXException { + throws ISLookUpException, DocumentException, SAXException, IOException { ISLookUpService isLookUp = ISLookupClientFactory.getLookUpService(isLookupUrl); - final List res = isLookUp.quickSearchProfile(XQUERY); + final List res = isLookUp + .quickSearchProfile( + IOUtils + .toString( + QueryInformationSystem.class + .getResourceAsStream( + "/eu/dnetlib/dhp/bulktag/query.xq"))); final String xmlConf = "" + Joiner.on(" ").join(res) + ""; diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/query.xq b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/query.xq index e69de29bb..6fbd74c8f 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/query.xq +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/query.xq @@ -0,0 +1,58 @@ +for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') +let $subj := $x//CONFIGURATION/context/param[./@name='subject']/text() +let $datasources := $x//CONFIGURATION/context/category[./@id=concat($x//CONFIGURATION/context/@id,'::contentproviders')]/concept +let $organizations := $x//CONFIGURATION/context/category[./@id=concat($x//CONFIGURATION/context/@id,'::resultorganizations')]/concept +let $communities := $x//CONFIGURATION/context/category[./@id=concat($x//CONFIGURATION/context/@id,'::zenodocommunities')]/concept +let $fos := $x//CONFIGURATION/context/param[./@name='fos']/text() +let $sdg := $x//CONFIGURATION/context/param[./@name='sdg']/text() +let $zenodo := $x//param[./@name='zenodoCommunity']/text() +where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] and $x//context/param[./@name = 'status']/text() != 'hidden' +return + +{ $x//CONFIGURATION/context/@id} + +{$x//CONFIGURATION/context/param[./@name='advancedConstraints']/text() } + + + {for $y in tokenize($subj,',') + return + {$y}} + {for $y in tokenize($fos,',') + return + {$y}} + {for $y in tokenize($sdg,',') + return + {$y}} + + + {for $d in $datasources + where $d/param[./@name='enabled']/text()='true' + return + + + {$d//param[./@name='openaireId']/text()} + + + {$d/param[./@name='selcriteria']/text()} + + } + + +{for $zc in $zenodo +return + + +{$zc} + +} +{for $zc in $communities +return + + +{$zc/param[./@name='zenodoid']/text()} + + +{$zc/param[./@name='selcriteria']/text()} + +} + \ No newline at end of file From 6674cccb9478d9a4d9cef15aa239c3593f8f39f2 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 16 Dec 2022 15:33:20 +0100 Subject: [PATCH 51/55] [BulkTag] description of parameters more comprehensive for those who do not implement it --- .../eu/dnetlib/dhp/bulktag/input_bulkTag_parameters.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/input_bulkTag_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/input_bulkTag_parameters.json index a37d7d168..a8be7c32e 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/input_bulkTag_parameters.json +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/input_bulkTag_parameters.json @@ -38,13 +38,13 @@ { "paramName": "test", "paramLongName": "isTest", - "paramDescription": "true if the spark session is managed, false otherwise", + "paramDescription": "Parameter intended for testing purposes only. True if the reun is relatesd to a test and so the taggingConf parameter should be loaded", "paramRequired": false }, { "paramName": "tg", "paramLongName": "taggingConf", - "paramDescription": "true if the spark session is managed, false otherwise", + "paramDescription": "this parameter is intended for testing purposes only. It is a possible tagging configuration obtained via the XQUERY. Intended to be removed", "paramRequired": false } From 9cf0a98699d161d298db16641b3d3658e0ea3346 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 20 Dec 2022 10:17:33 +0100 Subject: [PATCH 52/55] [cleaning] set the common subject classid/name --- .../oaf/utils/GraphCleaningFunctions.java | 9 ++ .../dhp/oa/merge/AuthorMergerTest.java | 100 ------------------ .../oa/merge/publications_with_authors.json | 3 - 3 files changed, 9 insertions(+), 103 deletions(-) delete mode 100644 dhp-common/src/test/java/eu/dnetlib/dhp/oa/merge/AuthorMergerTest.java delete mode 100644 dhp-common/src/test/resources/eu/dnetlib/dhp/oa/merge/publications_with_authors.json diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java index 347d3eb20..b24daaa5d 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java @@ -23,6 +23,8 @@ import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.*; import me.xuender.unidecode.Unidecode; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.Encoders; public class GraphCleaningFunctions extends CleaningFunctions { @@ -201,6 +203,13 @@ public class GraphCleaningFunctions extends CleaningFunctions { .filter(sp -> StringUtils.isNotBlank(sp.getValue())) .filter(sp -> Objects.nonNull(sp.getQualifier())) .filter(sp -> StringUtils.isNotBlank(sp.getQualifier().getClassid())) + .map(s -> { + if ("dnet:result_subject".equals(s.getQualifier().getClassid())) { + s.getQualifier().setClassid(ModelConstants.DNET_SUBJECT_TYPOLOGIES); + s.getQualifier().setClassname(ModelConstants.DNET_SUBJECT_TYPOLOGIES); + } + return s; + }) .map(GraphCleaningFunctions::cleanValue) .collect( Collectors diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/oa/merge/AuthorMergerTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/oa/merge/AuthorMergerTest.java deleted file mode 100644 index 3a7a41a1b..000000000 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/oa/merge/AuthorMergerTest.java +++ /dev/null @@ -1,100 +0,0 @@ - -package eu.dnetlib.dhp.oa.merge; - -import java.io.BufferedReader; -import java.io.FileReader; -import java.io.IOException; -import java.nio.file.Paths; -import java.util.ArrayList; -import java.util.List; -import java.util.stream.Collectors; - -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -import com.fasterxml.jackson.databind.ObjectMapper; - -import eu.dnetlib.dhp.schema.oaf.Author; -import eu.dnetlib.dhp.schema.oaf.Publication; -import eu.dnetlib.dhp.schema.oaf.StructuredProperty; -import eu.dnetlib.pace.util.MapDocumentUtil; -import scala.Tuple2; - -class AuthorMergerTest { - - private String publicationsBasePath; - - private List> authors; - - @BeforeEach - public void setUp() throws Exception { - - publicationsBasePath = Paths - .get(AuthorMergerTest.class.getResource("/eu/dnetlib/dhp/oa/merge").toURI()) - .toFile() - .getAbsolutePath(); - - authors = readSample(publicationsBasePath + "/publications_with_authors.json", Publication.class) - .stream() - .map(p -> p._2().getAuthor()) - .collect(Collectors.toList()); - - } - - @Test - void mergeTest() { // used in the dedup: threshold set to 0.95 - - for (List authors1 : authors) { - System.out.println("List " + (authors.indexOf(authors1) + 1)); - for (Author author : authors1) { - System.out.println(authorToString(author)); - } - } - - List merge = AuthorMerger.merge(authors); - - System.out.println("Merge "); - for (Author author : merge) { - System.out.println(authorToString(author)); - } - - Assertions.assertEquals(7, merge.size()); - - } - - public List> readSample(String path, Class clazz) { - List> res = new ArrayList<>(); - BufferedReader reader; - try { - reader = new BufferedReader(new FileReader(path)); - String line = reader.readLine(); - while (line != null) { - res - .add( - new Tuple2<>( - MapDocumentUtil.getJPathString("$.id", line), - new ObjectMapper().readValue(line, clazz))); - // read next line - line = reader.readLine(); - } - reader.close(); - } catch (IOException e) { - e.printStackTrace(); - } - - return res; - } - - public String authorToString(Author a) { - - String print = "Fullname = "; - print += a.getFullname() + " pid = ["; - if (a.getPid() != null) - for (StructuredProperty sp : a.getPid()) { - print += sp.toComparableString() + " "; - } - print += "]"; - return print; - } -} diff --git a/dhp-common/src/test/resources/eu/dnetlib/dhp/oa/merge/publications_with_authors.json b/dhp-common/src/test/resources/eu/dnetlib/dhp/oa/merge/publications_with_authors.json deleted file mode 100644 index 600181ba5..000000000 --- a/dhp-common/src/test/resources/eu/dnetlib/dhp/oa/merge/publications_with_authors.json +++ /dev/null @@ -1,3 +0,0 @@ -{ "journal":{ "dataInfo":null, "conferenceplace":null, "issnPrinted":"0009-9260", "issnOnline":null, "issnLinking":null, "ep":"636", "iss":null, "sp":"632", "vol":"55", "edition":null, "conferencedate":null, "name":"Clinical Radiology" }, "measures":null, "author":[ { "rank":null, "fullname":"KARL TURETSCHEK", "affiliation":null, "pid":null, "surname":"TURETSCHEK", "name":"KARL" }, { "rank":null, "fullname":"WOLFGANG EBNER", "affiliation":null, "pid":null, "surname":"EBNER", "name":"WOLFGANG" }, { "rank":null, "fullname":"DOMINIK FLEISCHMANN", "affiliation":null, "pid":null, "surname":"FLEISCHMANN", "name":"DOMINIK" }, { "rank":null, "fullname":"PATRICK WUNDERBALDINGER", "affiliation":null, "pid":null, "surname":"WUNDERBALDINGER", "name":"PATRICK" }, { "rank":null, "fullname":"LUDWIG ERLACHER", "affiliation":null, "pid":null, "surname":"ERLACHER", "name":"LUDWIG" }, { "rank":null, "fullname":"THOMAS ZONTSICH", "affiliation":null, "pid":null, "surname":"ZONTSICH", "name":"THOMAS" }, { "rank":null, "fullname":"ALEXANDER A. BANKIER", "affiliation":null, "pid":null, "surname":"BANKIER", "name":"ALEXANDER A." } ], "resulttype":{ "classid":"publication", "schemeid":"dnet:result_typologies", "schemename":"dnet:result_typologies", "classname":"publication"}, "title":[ { "qualifier":{ "classid":"main title", "schemeid":"dnet:dataCite_title", "schemename":"dnet:dataCite_title", "classname":"main title" }, "dataInfo":null, "value":"Early Pulmonary Involvement in Ankylosing Spondylitis: Assessment With Thin-section CT" } ], "relevantdate":[ { "qualifier":{ "classid":"created", "schemeid":"dnet:dataCite_date", "schemename":"dnet:dataCite_date", "classname":"created" }, "dataInfo":null, "value":"2002-09-19T13:54:50Z" } ], "dateofacceptance":{ "dataInfo":null, "value":"2002-09-19T13:54:50Z" }, "publisher":{ "dataInfo":null, "value":"Elsevier BV" }, "embargoenddate":null, "fulltext":null, "contributor":null, "resourcetype":{ "classid":"0001", "schemeid":"dnet:dataCite_resource", "schemename":"dnet:dataCite_resource", "classname":"0001"}, "coverage":null, "bestaccessright":null, "externalReference":null, "format":null, "description":[ ], "source":[ { "dataInfo":null, "value":"Crossref" } ], "subject":[ { "qualifier":{ "classid":"keywords", "schemeid":"dnet:subject_classification_typologies", "schemename":"dnet:subject_classification_typologies", "classname":"keywords" }, "dataInfo":null, "value":"Radiology Nuclear Medicine and imaging" }, { "qualifier":{ "classid":"keywords", "schemeid":"dnet:subject_classification_typologies", "schemename":"dnet:subject_classification_typologies", "classname":"keywords" }, "dataInfo":null, "value":"General Medicine" } ], "language":null, "instance":[ { "processingchargecurrency":null, "refereed":null, "instancetype":{ "classid":"0001", "schemeid":"dnet:publication_resource", "schemename":"dnet:publication_resource", "classname":"Article" }, "hostedby":null, "distributionlocation":null, "processingchargeamount":null, "license":{ "dataInfo":null, "value":"https://www.elsevier.com/tdm/userlicense/1.0/" }, "accessright":{ "classid":"RESTRICTED", "schemeid":"dnet:access_modes", "schemename":"dnet:access_modes", "classname":"Restricted" }, "dateofacceptance":{ "dataInfo":null, "value":"2002-09-19T13:54:50Z" }, "collectedfrom":{ "dataInfo":null, "value":"Crossref", "key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2" }, "url":[ "https://api.elsevier.com/content/article/PII:S0009926000904987?httpAccept=text/xml", "https://api.elsevier.com/content/article/PII:S0009926000904987?httpAccept=text/plain", "http://dx.doi.org/10.1053/crad.2000.0498" ] } ], "context":null, "country":null, "originalId":[ "S0009926000904987", "10.1053/crad.2000.0498" ], "pid":[ { "qualifier":{ "classid":"doi", "schemeid":"dnet:pid_types", "schemename":"dnet:pid_types", "classname":"doi" }, "dataInfo":null, "value":"10.1053/crad.2000.0498" } ], "dateofcollection":"2020-02-06T20:40:22Z", "dateoftransformation":null, "oaiprovenance":null, "extraInfo":null, "id":"50|doiboost____::994b7e47b9e225ab6d5e14841cb45a7f", "collectedfrom":[ { "dataInfo":null, "value":"Crossref", "key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2" } ], "dataInfo":{ "trust":"0.9", "invisible":false, "inferred":false, "deletedbyinference":false, "inferenceprovenance":null, "provenanceaction":{ "classid":"sysimport:actionset", "schemeid":"dnet:provenanceActions", "schemename":"dnet:provenanceActions", "classname":"sysimport:actionset" } }, "lastupdatetimestamp":1581021622595 } -{ "journal":null, "measures":null, "author":[ { "rank":null, "fullname":"Dominik Fleischmann", "affiliation":null, "pid":[ { "qualifier":{ "classid":"ORCID", "schemeid":"dnet:pid_types", "schemename":"dnet:pid_types", "classname":"ORCID" }, "dataInfo":{ "trust":"0.91", "invisible":false, "inferred":false, "deletedbyinference":false, "inferenceprovenance":null, "provenanceaction":{ "classid":"sysimport:crosswalk:entityregistry", "schemeid":"dnet:provenanceActions", "schemename":"dnet:provenanceActions", "classname":"Harvested"} }, "value":"0000-0003-0715-0952" } ], "surname":"Fleischmann", "name":"Dominik" } ], "resulttype":{ "classid":"publication", "schemeid":"dnet:result_typologies", "schemename":"dnet:result_typologies", "classname":"publication"}, "title":[ ], "relevantdate":[ ], "dateofacceptance":null, "publisher":null, "embargoenddate":null, "fulltext":[ ], "contributor":[ ], "resourcetype":null, "coverage":[ ], "bestaccessright":null, "externalReference":[ ], "format":[ ], "description":null, "source":[ ], "subject":[ ], "language":null, "instance":[ ], "context":[ ], "country":[ ], "originalId":[ ], "pid":[ { "qualifier":{ "classid":"doi", "schemeid":"dnet:pid_types", "schemename":"dnet:pid_types", "classname":"doi"}, "dataInfo":null, "value":"10.1053/crad.2000.0498" } ], "dateofcollection":null, "dateoftransformation":null, "oaiprovenance":null, "extraInfo":[ ], "id":"50|doiboost____::994b7e47b9e225ab6d5e14841cb45a7f", "collectedfrom":[ { "dataInfo":null, "value":"ORCID", "key":"10|openaire____::806360c771262b4d6770e7cdf04b5c5a" } ], "dataInfo":{ "trust":"0.9", "invisible":false, "inferred":false, "deletedbyinference":false, "inferenceprovenance":null, "provenanceaction":{ "classid":"sysimport:actionset", "schemeid":"dnet:provenanceActions", "schemename":"dnet:provenanceActions", "classname":"sysimport:actionset" } }, "lastupdatetimestamp":null } -{ "journal":{ "dataInfo":null, "conferenceplace":null, "issnPrinted":"0009-9260", "issnOnline":null, "issnLinking":null, "ep":"636", "iss":"8", "sp":"632", "vol":"55", "edition":null, "conferencedate":null, "name":"Clinical Radiology" }, "measures":null, "author":[ { "rank":null, "fullname":"T. Zontsich", "affiliation":[ { "dataInfo":null, "value":"University of Vienna" } ], "pid":[ { "qualifier":{ "classid":"URL", "schemeid":"dnet:pid_types", "schemename":"dnet:pid_types", "classname":"URL"}, "dataInfo":null, "value":"https://academic.microsoft.com/#/detail/1966908432" } ], "surname":null, "name":null }, { "rank":null, "fullname":"L Erlacher", "affiliation":[ { "dataInfo":null, "value":"University of Vienna" } ], "pid":[ { "qualifier":{ "classid":"URL", "schemeid":"dnet:pid_types", "schemename":"dnet:pid_types", "classname":"URL"}, "dataInfo":null, "value":"https://academic.microsoft.com/#/detail/687931320" } ], "surname":null, "name":null }, { "rank":null, "fullname":"Dominik Fleischmann", "affiliation":[ { "dataInfo":null, "value":"University of Vienna" } ], "pid":[ { "qualifier":{ "classid":"URL", "schemeid":"dnet:pid_types", "schemename":"dnet:pid_types", "classname":"URL"}, "dataInfo":null, "value":"https://academic.microsoft.com/#/detail/2156559961" } ], "surname":null, "name":null }, { "rank":null, "fullname":"Alexander A. Bankier", "affiliation":[ { "dataInfo":null, "value":"University of Vienna" } ], "pid":[ { "qualifier":{ "classid":"URL", "schemeid":"dnet:pid_types", "schemename":"dnet:pid_types", "classname":"URL"}, "dataInfo":null, "value":"https://academic.microsoft.com/#/detail/1107971609" } ], "surname":null, "name":null }, { "rank":null, "fullname":"Patrick Wunderbaldinger", "affiliation":[ { "dataInfo":null, "value":"University of Vienna" } ], "pid":[ { "qualifier":{ "classid":"URL", "schemeid":"dnet:pid_types", "schemename":"dnet:pid_types", "classname":"URL" }, "dataInfo":null, "value":"https://academic.microsoft.com/#/detail/2422340537" } ], "surname":null, "name":null }, { "rank":null, "fullname":"Wolfgang Ebner", "affiliation":null, "pid":[ { "qualifier":{ "classid":"URL", "schemeid":"dnet:pid_types", "schemename":"dnet:pid_types", "classname":"URL" }, "dataInfo":null, "value":"https://academic.microsoft.com/#/detail/2186462571" } ], "surname":null, "name":null }, { "rank":null, "fullname":"K. Turetschek", "affiliation":[ { "dataInfo":null, "value":"University of Vienna" } ], "pid":[ { "qualifier":{ "classid":"URL", "schemeid":"dnet:pid_types", "schemename":"dnet:pid_types", "classname":"URL" }, "dataInfo":null, "value":"https://academic.microsoft.com/#/detail/321765676" } ], "surname":null, "name":null } ], "resulttype":{ "classid":"publication", "schemeid":"dnet:result_typologies", "schemename":"dnet:result_typologies", "classname":"publication" }, "title":[ { "qualifier":{ "classid":"main title", "schemeid":"dnet:dataCite_title", "schemename":"dnet:dataCite_title", "classname":"main title" }, "dataInfo":null, "value":"early pulmonary involvement in ankylosing spondylitis assessment with thin section ct" }, { "qualifier":{ "classid":"alternative title", "schemeid":"dnet:dataCite_title", "schemename":"dnet:dataCite_title", "classname":"alternative title" }, "dataInfo":null, "value":"Early pulmonary involvement in ankylosing spondylitis: assessment with thin-section CT." } ], "relevantdate":null, "dateofacceptance":{ "dataInfo":null, "value":"2000-08-01" }, "publisher":{ "dataInfo":null, "value":"Elsevier" }, "embargoenddate":null, "fulltext":null, "contributor":null, "resourcetype":null, "coverage":null, "bestaccessright":null, "externalReference":null, "format":null, "description":[ { "dataInfo":null, "value":"Abstract AIM: To determine the frequency and the distribution of early pulmonary lesions in patients with ankylosing spondylitis (AS) and a normal chest X-ray on thin-section CT and to correlate the CT findings with the results of pulmonary function tests and clinical data. MATERIALS AND METHODS: Twenty-five patients with clinically proven AS and no history of smoking underwent clinical examinations, pulmonary function tests (PFT), chest radiography, and thin-section CT. Four of 25 patients (16%), who had obvious signs on plain films suggestive of pre-existing disorders unrelated to AS were excluded. RESULTS: Fifteen of 21 patients (71%) had abnormalities on thin-section CT. The most frequent abnormalities were thickening of the interlobular septa in seven of 21 patients (33%), mild bronchial wall thickening in (6/21, 29%), pleural thickening and pleuropulmonary irregularities (both 29%) and linear septal thickening (6/21, 29%). In six patients there were no signs of pleuropulmonary involvement. Eight of 15 patients (53%) with abnormal and four of six patients (67%) with normal CT findings revealed mild restrictive lung function impairment. CONCLUSION: Patients with AS but a normal chest radiograph frequently have abnormalities on thin-section CT. As these abnormalities are usually subtle and their extent does not correlate with functional and clinical data, the overall routine impact of thin-section CT in the diagnosis of AS is limited. Turetschek, K , (2000) Clinical Radiology53, 632–636." } ], "source":[ { "dataInfo":null, "value":null } ], "subject":[ { "qualifier":{ "classid":"MAG", "schemeid":"dnet:subject_classification_typologies", "schemename":"dnet:subject_classification_typologies", "classname":"Microsoft Academic Graph classification" }, "dataInfo":null, "value":"Complication" }, { "qualifier":{ "classid":"MAG", "schemeid":"dnet:subject_classification_typologies", "schemename":"dnet:subject_classification_typologies", "classname":"Microsoft Academic Graph classification" }, "dataInfo":null, "value":"Chest radiograph" }, { "qualifier":{ "classid":"MAG", "schemeid":"dnet:subject_classification_typologies", "schemename":"dnet:subject_classification_typologies", "classname":"Microsoft Academic Graph classification" }, "dataInfo":{ "trust":"0.580897", "invisible":false, "inferred":false, "deletedbyinference":false, "inferenceprovenance":null, "provenanceaction":{ "classid":"sysimport:actionset", "schemeid":"dnet:provenanceActions", "schemename":"dnet:provenanceActions", "classname":"sysimport:actionset" } }, "value":"medicine.diagnostic_test" }, { "qualifier":{ "classid":"MAG", "schemeid":"dnet:subject_classification_typologies", "schemename":"dnet:subject_classification_typologies", "classname":"Microsoft Academic Graph classification" }, "dataInfo":{ "trust":"0.580897", "invisible":false, "inferred":false, "deletedbyinference":false, "inferenceprovenance":null, "provenanceaction":{ "classid":"sysimport:actionset", "schemeid":"dnet:provenanceActions", "schemename":"dnet:provenanceActions", "classname":"sysimport:actionset" } }, "value":"medicine" }, { "qualifier":{ "classid":"MAG", "schemeid":"dnet:subject_classification_typologies", "schemename":"dnet:subject_classification_typologies", "classname":"Microsoft Academic Graph classification" }, "dataInfo":null, "value":"In patient" }, { "qualifier":{ "classid":"MAG", "schemeid":"dnet:subject_classification_typologies", "schemename":"dnet:subject_classification_typologies", "classname":"Microsoft Academic Graph classification" }, "dataInfo":null, "value":"Radiography" }, { "qualifier":{ "classid":"MAG", "schemeid":"dnet:subject_classification_typologies", "schemename":"dnet:subject_classification_typologies", "classname":"Microsoft Academic Graph classification" }, "dataInfo":{ "trust":"0.4582326", "invisible":false, "inferred":false, "deletedbyinference":false, "inferenceprovenance":null, "provenanceaction":{ "classid":"sysimport:actionset", "schemeid":"dnet:provenanceActions", "schemename":"dnet:provenanceActions", "classname":"sysimport:actionset" } }, "value":"business.industry" }, { "qualifier":{ "classid":"MAG", "schemeid":"dnet:subject_classification_typologies", "schemename":"dnet:subject_classification_typologies", "classname":"Microsoft Academic Graph classification" }, "dataInfo":{ "trust":"0.4582326", "invisible":false, "inferred":false, "deletedbyinference":false, "inferenceprovenance":null, "provenanceaction":{ "classid":"sysimport:actionset", "schemeid":"dnet:provenanceActions", "schemename":"dnet:provenanceActions", "classname":"sysimport:actionset" } }, "value":"business" }, { "qualifier":{ "classid":"MAG", "schemeid":"dnet:subject_classification_typologies", "schemename":"dnet:subject_classification_typologies", "classname":"Microsoft Academic Graph classification" }, "dataInfo":null, "value":"Thin section ct" }, { "qualifier":{ "classid":"MAG", "schemeid":"dnet:subject_classification_typologies", "schemename":"dnet:subject_classification_typologies", "classname":"Microsoft Academic Graph classification" }, "dataInfo":null, "value":"Respiratory disease" }, { "qualifier":{ "classid":"MAG", "schemeid":"dnet:subject_classification_typologies", "schemename":"dnet:subject_classification_typologies", "classname":"Microsoft Academic Graph classification" }, "dataInfo":{ "trust":"0.49358836", "invisible":false, "inferred":false, "deletedbyinference":false, "inferenceprovenance":null, "provenanceaction":{ "classid":"sysimport:actionset", "schemeid":"dnet:provenanceActions", "schemename":"dnet:provenanceActions", "classname":"sysimport:actionset" } }, "value":"medicine.disease" }, { "qualifier":{ "classid":"MAG", "schemeid":"dnet:subject_classification_typologies", "schemename":"dnet:subject_classification_typologies", "classname":"Microsoft Academic Graph classification" }, "dataInfo":{ "trust":"0.49358836", "invisible":false, "inferred":false, "deletedbyinference":false, "inferenceprovenance":null, "provenanceaction":{ "classid":"sysimport:actionset", "schemeid":"dnet:provenanceActions", "schemename":"dnet:provenanceActions", "classname":"sysimport:actionset" } }, "value":"medicine" }, { "qualifier":{ "classid":"MAG", "schemeid":"dnet:subject_classification_typologies", "schemename":"dnet:subject_classification_typologies", "classname":"Microsoft Academic Graph classification" }, "dataInfo":null, "value":"Ankylosing spondylitis" }, { "qualifier":{ "classid":"MAG", "schemeid":"dnet:subject_classification_typologies", "schemename":"dnet:subject_classification_typologies", "classname":"Microsoft Academic Graph classification" }, "dataInfo":{ "trust":"0.49937168", "invisible":false, "inferred":false, "deletedbyinference":false, "inferenceprovenance":null, "provenanceaction":{ "classid":"sysimport:actionset", "schemeid":"dnet:provenanceActions", "schemename":"dnet:provenanceActions", "classname":"sysimport:actionset" } }, "value":"medicine.disease" }, { "qualifier":{ "classid":"MAG", "schemeid":"dnet:subject_classification_typologies", "schemename":"dnet:subject_classification_typologies", "classname":"Microsoft Academic Graph classification" }, "dataInfo":{ "trust":"0.49937168", "invisible":false, "inferred":false, "deletedbyinference":false, "inferenceprovenance":null, "provenanceaction":{ "classid":"sysimport:actionset", "schemeid":"dnet:provenanceActions", "schemename":"dnet:provenanceActions", "classname":"sysimport:actionset" } }, "value":"medicine" }, { "qualifier":{ "classid":"MAG", "schemeid":"dnet:subject_classification_typologies", "schemename":"dnet:subject_classification_typologies", "classname":"Microsoft Academic Graph classification" }, "dataInfo":null, "value":"Radiology" }, { "qualifier":{ "classid":"MAG", "schemeid":"dnet:subject_classification_typologies", "schemename":"dnet:subject_classification_typologies", "classname":"Microsoft Academic Graph classification" }, "dataInfo":{ "trust":"0.4573571", "invisible":false, "inferred":false, "deletedbyinference":false, "inferenceprovenance":null, "provenanceaction":{ "classid":"sysimport:actionset", "schemeid":"dnet:provenanceActions", "schemename":"dnet:provenanceActions", "classname":"sysimport:actionset" } }, "value":"medicine.medical_specialty" }, { "qualifier":{ "classid":"MAG", "schemeid":"dnet:subject_classification_typologies", "schemename":"dnet:subject_classification_typologies", "classname":"Microsoft Academic Graph classification" }, "dataInfo":{ "trust":"0.4573571", "invisible":false, "inferred":false, "deletedbyinference":false, "inferenceprovenance":null, "provenanceaction":{ "classid":"sysimport:actionset", "schemeid":"dnet:provenanceActions", "schemename":"dnet:provenanceActions", "classname":"sysimport:actionset" } }, "value":"medicine" }, { "qualifier":{ "classid":"MAG", "schemeid":"dnet:subject_classification_typologies", "schemename":"dnet:subject_classification_typologies", "classname":"Microsoft Academic Graph classification" }, "dataInfo":null, "value":"Medicine" }, { "qualifier":{ "classid":"MAG", "schemeid":"dnet:subject_classification_typologies", "schemename":"dnet:subject_classification_typologies", "classname":"Microsoft Academic Graph classification" }, "dataInfo":{ "trust":"0.40295774", "invisible":false, "inferred":false, "deletedbyinference":false, "inferenceprovenance":null, "provenanceaction":{ "classid":"sysimport:actionset", "schemeid":"dnet:provenanceActions", "schemename":"dnet:provenanceActions", "classname":"sysimport:actionset" } }, "value":"business.industry" }, { "qualifier":{ "classid":"MAG", "schemeid":"dnet:subject_classification_typologies", "schemename":"dnet:subject_classification_typologies", "classname":"Microsoft Academic Graph classification" }, "dataInfo":{ "trust":"0.40295774", "invisible":false, "inferred":false, "deletedbyinference":false, "inferenceprovenance":null, "provenanceaction":{ "classid":"sysimport:actionset", "schemeid":"dnet:provenanceActions", "schemename":"dnet:provenanceActions", "classname":"sysimport:actionset" } }, "value":"business" }, { "qualifier":{ "classid":"MAG", "schemeid":"dnet:subject_classification_typologies", "schemename":"dnet:subject_classification_typologies", "classname":"Microsoft Academic Graph classification" }, "dataInfo":null, "value":"Pulmonary function testing" } ], "language":null, "instance":[ { "processingchargecurrency":null, "refereed":null, "instancetype":null, "hostedby":null, "distributionlocation":null, "processingchargeamount":null, "license":null, "accessright":null, "dateofacceptance":null, "collectedfrom":{ "dataInfo":null, "value":"Microsoft Academic Graph", "key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a" }, "url":[ "https://www.ncbi.nlm.nih.gov/pubmed/10964736", "https://www.sciencedirect.com/science/article/pii/S0009926000904987", "https://academic.microsoft.com/#/detail/1990704599" ] } ], "context":null, "country":null, "originalId":[ "1990704599", "10.1053/crad.2000.0498" ], "pid":[ { "qualifier":{ "classid":"doi", "schemeid":"dnet:pid_types", "schemename":"dnet:pid_types", "classname":"doi" }, "dataInfo":null, "value":"10.1053/crad.2000.0498" } ], "dateofcollection":null, "dateoftransformation":null, "oaiprovenance":null, "extraInfo":null, "id":"50|doiboost____::994b7e47b9e225ab6d5e14841cb45a7f", "collectedfrom":[ { "dataInfo":null, "value":"Microsoft Academic Graph", "key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a" } ], "dataInfo":{ "trust":"0.9", "invisible":false, "inferred":false, "deletedbyinference":false, "inferenceprovenance":null, "provenanceaction":{ "classid":"sysimport:actionset", "schemeid":"dnet:provenanceActions", "schemename":"dnet:provenanceActions", "classname":"sysimport:actionset"} }, "lastupdatetimestamp":null } \ No newline at end of file From 6aa91204a5950ae8b26844760e875833315c6388 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 20 Dec 2022 14:15:46 +0100 Subject: [PATCH 53/55] [orcid propagation] skip empty directories --- .../java/eu/dnetlib/dhp/PropagationConstant.java | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java index 02fdcb09b..36361a09e 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java @@ -228,10 +228,15 @@ public class PropagationConstant { public static Dataset readPath( SparkSession spark, String inputPath, Class clazz) { - return spark - .read() - .textFile(inputPath) - .map((MapFunction) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz)); + + if (HdfsSupport.exists(inputPath, spark.sparkContext().hadoopConfiguration())) { + return spark + .read() + .textFile(inputPath) + .map((MapFunction) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz)); + } else { + return spark.emptyDataset(Encoders.bean(clazz)); + } } } From 3c9826f18620eba9c3500831680f7275c76395bb Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Wed, 21 Dec 2022 11:21:17 +0100 Subject: [PATCH 54/55] updated lines function to it's implementation linesWithSeparators.map(l => l.stripLineEnd) in this way we force scala plugin compiler to consider this pipeline scala code and not java.string.lines() pipeline --- .../bio/ebi/SparkCreateBaselineDataFrame.scala | 2 +- .../eu/dnetlib/dhp/sx/bio/BioScholixTest.scala | 18 +++++++++--------- .../crossref/CrossrefMappingTest.scala | 4 ++-- .../doiboost/orcid/MappingORCIDToOAFTest.scala | 2 +- .../dhp/doiboost/uw/UnpayWallMappingTest.scala | 4 ++-- .../graph/resolution/ResolveEntitiesTest.scala | 6 +++--- .../sx/graph/scholix/ScholixGraphTest.scala | 4 ++-- 7 files changed, 20 insertions(+), 20 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala index 9c55ec7be..87116f00a 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala @@ -27,7 +27,7 @@ object SparkCreateBaselineDataFrame { def requestBaseLineUpdatePage(maxFile: String): List[(String, String)] = { val data = requestPage("https://ftp.ncbi.nlm.nih.gov/pubmed/updatefiles/") - val result = data.lines + val result = data.linesWithSeparators.map(l =>l.stripLineEnd) .filter(l => l.startsWith("") diff --git a/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala b/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala index 827d23e72..24caaa553 100644 --- a/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala +++ b/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala @@ -63,7 +63,7 @@ class BioScholixTest extends AbstractVocabularyTest { val records: String = Source .fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/pubmed_dump")) .mkString - val r: List[Oaf] = records.lines.toList + val r: List[Oaf] = records.linesWithSeparators.map(l =>l.stripLineEnd).toList .map(s => mapper.readValue(s, classOf[PMArticle])) .map(a => PubMedToOaf.convert(a, vocabularies)) assertEquals(10, r.size) @@ -173,9 +173,9 @@ class BioScholixTest extends AbstractVocabularyTest { val records: String = Source .fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/pdb_dump")) .mkString - records.lines.foreach(s => assertTrue(s.nonEmpty)) + records.linesWithSeparators.map(l =>l.stripLineEnd).foreach(s => assertTrue(s.nonEmpty)) - val result: List[Oaf] = records.lines.toList.flatMap(o => BioDBToOAF.pdbTOOaf(o)) + val result: List[Oaf] = records.linesWithSeparators.map(l =>l.stripLineEnd).toList.flatMap(o => BioDBToOAF.pdbTOOaf(o)) assertTrue(result.nonEmpty) result.foreach(r => assertNotNull(r)) @@ -194,9 +194,9 @@ class BioScholixTest extends AbstractVocabularyTest { val records: String = Source .fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/uniprot_dump")) .mkString - records.lines.foreach(s => assertTrue(s.nonEmpty)) + records.linesWithSeparators.map(l =>l.stripLineEnd).foreach(s => assertTrue(s.nonEmpty)) - val result: List[Oaf] = records.lines.toList.flatMap(o => BioDBToOAF.uniprotToOAF(o)) + val result: List[Oaf] = records.linesWithSeparators.map(l =>l.stripLineEnd).toList.flatMap(o => BioDBToOAF.uniprotToOAF(o)) assertTrue(result.nonEmpty) result.foreach(r => assertNotNull(r)) @@ -239,9 +239,9 @@ class BioScholixTest extends AbstractVocabularyTest { val records: String = Source .fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/crossref_links")) .mkString - records.lines.foreach(s => assertTrue(s.nonEmpty)) + records.linesWithSeparators.map(l =>l.stripLineEnd).foreach(s => assertTrue(s.nonEmpty)) - val result: List[Oaf] = records.lines.map(s => BioDBToOAF.crossrefLinksToOaf(s)).toList + val result: List[Oaf] = records.linesWithSeparators.map(l =>l.stripLineEnd).map(s => BioDBToOAF.crossrefLinksToOaf(s)).toList assertNotNull(result) assertTrue(result.nonEmpty) @@ -276,11 +276,11 @@ class BioScholixTest extends AbstractVocabularyTest { getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/scholix_resolved") ) .mkString - records.lines.foreach(s => assertTrue(s.nonEmpty)) + records.linesWithSeparators.map(l =>l.stripLineEnd).foreach(s => assertTrue(s.nonEmpty)) implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats - val l: List[ScholixResolved] = records.lines.map { input => + val l: List[ScholixResolved] = records.linesWithSeparators.map(l =>l.stripLineEnd).map { input => lazy val json = parse(input) json.extract[ScholixResolved] }.toList diff --git a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala index 98b5b708e..aba8cee12 100644 --- a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala @@ -31,13 +31,13 @@ class CrossrefMappingTest { .fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/funder_doi")) .mkString - for (line <- funder_doi.lines) { + for (line <- funder_doi.linesWithSeparators.map(l =>l.stripLineEnd)) { val json = template.replace("%s", line) val resultList: List[Oaf] = Crossref2Oaf.convert(json) assertTrue(resultList.nonEmpty) checkRelation(resultList) } - for (line <- funder_name.lines) { + for (line <- funder_name.linesWithSeparators.map(l =>l.stripLineEnd)) { val json = template.replace("%s", line) val resultList: List[Oaf] = Crossref2Oaf.convert(json) assertTrue(resultList.nonEmpty) diff --git a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/orcid/MappingORCIDToOAFTest.scala b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/orcid/MappingORCIDToOAFTest.scala index e5bf1bd5f..d7a6a94a5 100644 --- a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/orcid/MappingORCIDToOAFTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/orcid/MappingORCIDToOAFTest.scala @@ -25,7 +25,7 @@ class MappingORCIDToOAFTest { .mkString assertNotNull(json) assertFalse(json.isEmpty) - json.lines.foreach(s => { + json.linesWithSeparators.map(l =>l.stripLineEnd).foreach(s => { assertNotNull(ORCIDToOAF.extractValueFromInputString(s)) }) } diff --git a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/uw/UnpayWallMappingTest.scala b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/uw/UnpayWallMappingTest.scala index 542faa8ad..7fe0e9935 100644 --- a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/uw/UnpayWallMappingTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/uw/UnpayWallMappingTest.scala @@ -22,7 +22,7 @@ class UnpayWallMappingTest { .mkString var i: Int = 0 - for (line <- Ilist.lines) { + for (line <- Ilist.linesWithSeparators.map(l =>l.stripLineEnd)) { val p = UnpayWallToOAF.convertToOAF(line) if (p != null) { @@ -43,7 +43,7 @@ class UnpayWallMappingTest { i = i + 1 } - val l = Ilist.lines.next() + val l = Ilist.linesWithSeparators.map(l =>l.stripLineEnd).next() val item = UnpayWallToOAF.convertToOAF(l) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala index e333da1aa..d415b7fc9 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala @@ -53,7 +53,7 @@ class ResolveEntitiesTest extends Serializable { def generateUpdates(spark: SparkSession): Unit = { val template = Source.fromInputStream(this.getClass.getResourceAsStream("updates")).mkString - val pids: List[String] = template.lines + val pids: List[String] = template.linesWithSeparators.map(l =>l.stripLineEnd) .map { id => val r = new Result r.setId(id.toLowerCase.trim) @@ -127,7 +127,7 @@ class ResolveEntitiesTest extends Serializable { entities.foreach { e => val template = Source.fromInputStream(this.getClass.getResourceAsStream(s"$e")).mkString spark - .createDataset(spark.sparkContext.parallelize(template.lines.toList)) + .createDataset(spark.sparkContext.parallelize(template.linesWithSeparators.map(l =>l.stripLineEnd).toList)) .as[String] .write .option("compression", "gzip") @@ -264,7 +264,7 @@ class ResolveEntitiesTest extends Serializable { Source .fromInputStream(this.getClass.getResourceAsStream(s"publication")) .mkString - .lines + .linesWithSeparators.map(l =>l.stripLineEnd) .next(), classOf[Publication] ) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala index e92f36896..0ea908290 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala @@ -47,7 +47,7 @@ class ScholixGraphTest extends AbstractVocabularyTest { val inputRelations = Source .fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/oaf_to_summary")) .mkString - val items = inputRelations.lines.toList + val items = inputRelations.linesWithSeparators.map(l =>l.stripLineEnd).toList assertNotNull(items) items.foreach(i => assertTrue(i.nonEmpty)) val result = @@ -69,7 +69,7 @@ class ScholixGraphTest extends AbstractVocabularyTest { getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/merge_result_scholix") ) .mkString - val result: List[(Relation, ScholixSummary)] = inputRelations.lines + val result: List[(Relation, ScholixSummary)] = inputRelations.linesWithSeparators.map(l =>l.stripLineEnd) .sliding(2) .map(s => (s.head, s(1))) .map(p => (mapper.readValue(p._1, classOf[Relation]), mapper.readValue(p._2, classOf[ScholixSummary]))) From 7b7520850bca9d9707cfb8e04331292d0fcbeb86 Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Wed, 11 Jan 2023 08:35:53 +0100 Subject: [PATCH 55/55] fixed an invalid char --- .../test/resources/eu/dnetlib/dhp/transform/cnr_explora_tr.xslt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/cnr_explora_tr.xslt b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/cnr_explora_tr.xslt index 78b167fde..33770ce47 100644 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/cnr_explora_tr.xslt +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/cnr_explora_tr.xslt @@ -130,7 +130,7 @@ - +