From 62c4c3ed29dcf395b2f18c5c6495f6502f011c65 Mon Sep 17 00:00:00 2001 From: miconis Date: Wed, 9 Oct 2024 12:26:03 +0200 Subject: [PATCH 01/73] implementation of new comparators for organization and dataset disambiguation --- .../java/eu/dnetlib/pace/tree/CodeMatch.java | 2 +- .../eu/dnetlib/pace/tree/JsonListMatch.java | 43 +++++++++++++------ .../pace/comparators/ComparatorTest.java | 34 ++++++++++++--- 3 files changed, 60 insertions(+), 19 deletions(-) diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/CodeMatch.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/CodeMatch.java index 25a12bcdf..5cd21adcd 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/CodeMatch.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/CodeMatch.java @@ -21,7 +21,7 @@ public class CodeMatch extends AbstractStringComparator { public CodeMatch(Map params) { super(params); this.params = params; - this.CODE_REGEX = Pattern.compile(params.getOrDefault("codeRegex", "[a-zA-Z]::\\d+")); + this.CODE_REGEX = Pattern.compile(params.getOrDefault("codeRegex", "[a-zA-Z]+::\\d+")); } public Set getRegexList(String input) { diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/JsonListMatch.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/JsonListMatch.java index 3897e37f8..e95d9206e 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/JsonListMatch.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/JsonListMatch.java @@ -41,21 +41,38 @@ public class JsonListMatch extends AbstractListComparator { return -1; } - final Set ca = sa.stream().map(this::toComparableString).collect(Collectors.toSet()); - final Set cb = sb.stream().map(this::toComparableString).collect(Collectors.toSet()); + Set ca = sa.stream().map(this::toComparableString).collect(Collectors.toSet()); + Set cb = sb.stream().map(this::toComparableString).collect(Collectors.toSet()); - int incommon = Sets.intersection(ca, cb).size(); - int simDiff = Sets.symmetricDifference(ca, cb).size(); + switch (MODE) { + case "count": + return Sets.intersection(ca, cb).size(); - if (incommon + simDiff == 0) { - return 0.0; + case "percentage": + int incommon = Sets.intersection(ca, cb).size(); + int simDiff = Sets.symmetricDifference(ca, cb).size(); + if (incommon + simDiff == 0) { + return 0.0; + } + return (double) incommon / (incommon + simDiff); + + case "type": + Set typesA = ca.stream().map(s -> s.split("::")[0]).collect(Collectors.toSet()); + Set typesB = cb.stream().map(s -> s.split("::")[0]).collect(Collectors.toSet()); + + Set types = Sets.intersection(typesA, typesB); + + if (types.isEmpty()) // if no common type, it is impossible to compare + return -1; + + ca = ca.stream().filter(s -> types.contains(s.split("::")[0])).collect(Collectors.toSet()); + cb = cb.stream().filter(s -> types.contains(s.split("::")[0])).collect(Collectors.toSet()); + + return (double) Sets.intersection(ca, cb).size() / types.size(); + + default: + return -1; } - - if (MODE.equals("percentage")) - return (double) incommon / (incommon + simDiff); - else - return incommon; - } // converts every json into a comparable string basing on parameters @@ -69,7 +86,7 @@ public class JsonListMatch extends AbstractListComparator { // for each path in the param list for (String key : params.keySet().stream().filter(k -> k.contains("jpath")).collect(Collectors.toList())) { String path = params.get(key); - String value = MapDocumentUtil.getJPathString(path, documentContext); + String value = MapDocumentUtil.getJPathString(path, documentContext).toLowerCase(); if (value == null || value.isEmpty()) value = ""; st.append(value); diff --git a/dhp-pace-core/src/test/java/eu/dnetlib/pace/comparators/ComparatorTest.java b/dhp-pace-core/src/test/java/eu/dnetlib/pace/comparators/ComparatorTest.java index c008902c4..635f1fa44 100644 --- a/dhp-pace-core/src/test/java/eu/dnetlib/pace/comparators/ComparatorTest.java +++ b/dhp-pace-core/src/test/java/eu/dnetlib/pace/comparators/ComparatorTest.java @@ -65,6 +65,23 @@ public class ComparatorTest extends AbstractPaceTest { } + @Test + public void datasetVersionCodeMatchTest() { + + params.put("codeRegex", "(?=[\\w-]*[a-zA-Z])(?=[\\w-]*\\d)[\\w-]+"); + CodeMatch codeMatch = new CodeMatch(params); + + // names have different codes + assertEquals(0.0, codeMatch.distance("physical oceanography at ctd station june 1998 ev02a", "physical oceanography at ctd station june 1998 ir02", conf)); + + // names have same code + assertEquals(1.0, codeMatch.distance("physical oceanography at ctd station june 1998 ev02a", "physical oceanography at ctd station june 1998 ev02a", conf)); + + // code is not in both names + assertEquals(-1, codeMatch.distance("physical oceanography at ctd station june 1998", "physical oceanography at ctd station june 1998 ev02a", conf)); + assertEquals(1.0, codeMatch.distance("physical oceanography at ctd station june 1998", "physical oceanography at ctd station june 1998", conf)); + } + @Test public void listContainsMatchTest() { @@ -257,15 +274,15 @@ public class ComparatorTest extends AbstractPaceTest { List a = createFieldList( Arrays .asList( - "{\"datainfo\":{\"deletedbyinference\":false,\"inferenceprovenance\":null,\"inferred\":false,\"invisible\":false,\"provenanceaction\":{\"classid\":\"sysimport:actionset\",\"classname\":\"Harvested\",\"schemeid\":\"dnet:provenanceActions\",\"schemename\":\"dnet:provenanceActions\"},\"trust\":\"0.9\"},\"qualifier\":{\"classid\":\"doi\",\"classname\":\"Digital Object Identifier\",\"schemeid\":\"dnet:pid_types\",\"schemename\":\"dnet:pid_types\"},\"value\":\"10.1111/pbi.12655\"}"), + "{\"datainfo\":{\"deletedbyinference\":false,\"inferenceprovenance\":null,\"inferred\":false,\"invisible\":false,\"provenanceaction\":{\"classid\":\"sysimport:actionset\",\"classname\":\"Harvested\",\"schemeid\":\"dnet:provenanceActions\",\"schemename\":\"dnet:provenanceActions\"},\"trust\":\"0.9\"},\"qualifier\":{\"classid\":\"grid\",\"classname\":\"GRID Identifier\",\"schemeid\":\"dnet:pid_types\",\"schemename\":\"dnet:pid_types\"},\"value\":\"grid_1\"}", + "{\"datainfo\":{\"deletedbyinference\":false,\"inferenceprovenance\":null,\"inferred\":false,\"invisible\":false,\"provenanceaction\":{\"classid\":\"sysimport:actionset\",\"classname\":\"Harvested\",\"schemeid\":\"dnet:provenanceActions\",\"schemename\":\"dnet:provenanceActions\"},\"trust\":\"0.9\"},\"qualifier\":{\"classid\":\"ror\",\"classname\":\"Research Organization Registry\",\"schemeid\":\"dnet:pid_types\",\"schemename\":\"dnet:pid_types\"},\"value\":\"ror_1\"}"), "authors"); List b = createFieldList( Arrays .asList( - "{\"datainfo\":{\"deletedbyinference\":false,\"inferenceprovenance\":\"\",\"inferred\":false,\"invisible\":false,\"provenanceaction\":{\"classid\":\"sysimport:crosswalk:repository\",\"classname\":\"Harvested\",\"schemeid\":\"dnet:provenanceActions\",\"schemename\":\"dnet:provenanceActions\"},\"trust\":\"0.9\"},\"qualifier\":{\"classid\":\"pmc\",\"classname\":\"PubMed Central ID\",\"schemeid\":\"dnet:pid_types\",\"schemename\":\"dnet:pid_types\"},\"value\":\"PMC5399005\"}", - "{\"datainfo\":{\"deletedbyinference\":false,\"inferenceprovenance\":\"\",\"inferred\":false,\"invisible\":false,\"provenanceaction\":{\"classid\":\"sysimport:crosswalk:repository\",\"classname\":\"Harvested\",\"schemeid\":\"dnet:provenanceActions\",\"schemename\":\"dnet:provenanceActions\"},\"trust\":\"0.9\"},\"qualifier\":{\"classid\":\"pmid\",\"classname\":\"PubMed ID\",\"schemeid\":\"dnet:pid_types\",\"schemename\":\"dnet:pid_types\"},\"value\":\"27775869\"}", - "{\"datainfo\":{\"deletedbyinference\":false,\"inferenceprovenance\":\"\",\"inferred\":false,\"invisible\":false,\"provenanceaction\":{\"classid\":\"user:claim\",\"classname\":\"Linked by user\",\"schemeid\":\"dnet:provenanceActions\",\"schemename\":\"dnet:provenanceActions\"},\"trust\":\"0.9\"},\"qualifier\":{\"classid\":\"doi\",\"classname\":\"Digital Object Identifier\",\"schemeid\":\"dnet:pid_types\",\"schemename\":\"dnet:pid_types\"},\"value\":\"10.1111/pbi.12655\"}", - "{\"datainfo\":{\"deletedbyinference\":false,\"inferenceprovenance\":\"\",\"inferred\":false,\"invisible\":false,\"provenanceaction\":{\"classid\":\"sysimport:crosswalk:repository\",\"classname\":\"Harvested\",\"schemeid\":\"dnet:provenanceActions\",\"schemename\":\"dnet:provenanceActions\"},\"trust\":\"0.9\"},\"qualifier\":{\"classid\":\"handle\",\"classname\":\"Handle\",\"schemeid\":\"dnet:pid_types\",\"schemename\":\"dnet:pid_types\"},\"value\":\"1854/LU-8523529\"}"), + "{\"datainfo\":{\"deletedbyinference\":false,\"inferenceprovenance\":\"\",\"inferred\":false,\"invisible\":false,\"provenanceaction\":{\"classid\":\"sysimport:crosswalk:repository\",\"classname\":\"Harvested\",\"schemeid\":\"dnet:provenanceActions\",\"schemename\":\"dnet:provenanceActions\"},\"trust\":\"0.9\"},\"qualifier\":{\"classid\":\"grid\",\"classname\":\"GRID Identifier\",\"schemeid\":\"dnet:pid_types\",\"schemename\":\"dnet:pid_types\"},\"value\":\"grid_1\"}", + "{\"datainfo\":{\"deletedbyinference\":false,\"inferenceprovenance\":\"\",\"inferred\":false,\"invisible\":false,\"provenanceaction\":{\"classid\":\"sysimport:crosswalk:repository\",\"classname\":\"Harvested\",\"schemeid\":\"dnet:provenanceActions\",\"schemename\":\"dnet:provenanceActions\"},\"trust\":\"0.9\"},\"qualifier\":{\"classid\":\"ror\",\"classname\":\"Research Organization Registry\",\"schemeid\":\"dnet:pid_types\",\"schemename\":\"dnet:pid_types\"},\"value\":\"ror_2\"}", + "{\"datainfo\":{\"deletedbyinference\":false,\"inferenceprovenance\":\"\",\"inferred\":false,\"invisible\":false,\"provenanceaction\":{\"classid\":\"user:claim\",\"classname\":\"Linked by user\",\"schemeid\":\"dnet:provenanceActions\",\"schemename\":\"dnet:provenanceActions\"},\"trust\":\"0.9\"},\"qualifier\":{\"classid\":\"isni\",\"classname\":\"ISNI Identifier\",\"schemeid\":\"dnet:pid_types\",\"schemename\":\"dnet:pid_types\"},\"value\":\"isni_1\"}"), "authors"); double result = jsonListMatch.compare(a, b, conf); @@ -277,6 +294,13 @@ public class ComparatorTest extends AbstractPaceTest { result = jsonListMatch.compare(a, b, conf); assertEquals(1.0, result); + + params.put("mode", "type"); + jsonListMatch = new JsonListMatch(params); + result = jsonListMatch.compare(a, b, conf); + + assertEquals(0.5, result); + } @Test From 5015ba10eba66ba12750ff348cea559062f68e7e Mon Sep 17 00:00:00 2001 From: miconis Date: Mon, 14 Oct 2024 10:23:42 +0200 Subject: [PATCH 02/73] addition of date comparator --- dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/DateRange.java | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/DateRange.java diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/DateRange.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/DateRange.java new file mode 100644 index 000000000..91b8d1c41 --- /dev/null +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/DateRange.java @@ -0,0 +1,2 @@ +package eu.dnetlib.pace.tree;public class DateRange { +} From eab623ddfa0b01137df729981ef99abe0c129215 Mon Sep 17 00:00:00 2001 From: miconis Date: Mon, 14 Oct 2024 10:24:19 +0200 Subject: [PATCH 03/73] implementation of date matcher --- .../java/eu/dnetlib/pace/tree/DateRange.java | 67 ++++++++++++++++++- .../pace/comparators/ComparatorTest.java | 19 ++++++ 2 files changed, 85 insertions(+), 1 deletion(-) diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/DateRange.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/DateRange.java index 91b8d1c41..c913109a4 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/DateRange.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/DateRange.java @@ -1,2 +1,67 @@ -package eu.dnetlib.pace.tree;public class DateRange { +package eu.dnetlib.pace.tree; + +import com.wcohen.ss.AbstractStringDistance; +import eu.dnetlib.pace.config.Config; +import eu.dnetlib.pace.tree.support.AbstractStringComparator; +import eu.dnetlib.pace.tree.support.ComparatorClass; +import org.joda.time.DateTime; + +import java.time.DateTimeException; +import java.time.LocalDate; +import java.time.Period; +import java.time.format.DateTimeFormatter; +import java.util.Locale; +import java.util.Map; + +@ComparatorClass("dateRange") +public class DateRange extends AbstractStringComparator { + + int YEAR_RANGE; + + public DateRange(Map params) { + super(params, new com.wcohen.ss.JaroWinkler()); + YEAR_RANGE = Integer.parseInt(params.getOrDefault("year_range", "3")); + } + + public DateRange(final double weight) { + super(weight, new com.wcohen.ss.JaroWinkler()); + } + + protected DateRange(final double weight, final AbstractStringDistance ssalgo) { + super(weight, ssalgo); + } + + public static boolean isNumeric(String str) { + return str.matches("\\d+"); //match a number with optional '-' and decimal. + } + + @Override + public double distance(final String a, final String b, final Config conf) { + if (a.isEmpty() || b.isEmpty()) { + return -1.0; // return -1 if a field is missing + } + + try { + DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd", Locale.ENGLISH); + LocalDate d1 = LocalDate.parse(a, formatter); + LocalDate d2 = LocalDate.parse(b, formatter); + Period period = Period.between(d1, d2); + + return period.getYears() <= YEAR_RANGE? 1.0 : 0.0; + } + catch (DateTimeException e) { + return -1.0; + } + + } + + @Override + public double getWeight() { + return super.weight; + } + + @Override + protected double normalize(final double d) { + return d; + } } diff --git a/dhp-pace-core/src/test/java/eu/dnetlib/pace/comparators/ComparatorTest.java b/dhp-pace-core/src/test/java/eu/dnetlib/pace/comparators/ComparatorTest.java index 635f1fa44..5e1484254 100644 --- a/dhp-pace-core/src/test/java/eu/dnetlib/pace/comparators/ComparatorTest.java +++ b/dhp-pace-core/src/test/java/eu/dnetlib/pace/comparators/ComparatorTest.java @@ -351,4 +351,23 @@ public class ComparatorTest extends AbstractPaceTest { } + @Test + public void dateMatch() { + + DateRange dateRange = new DateRange(params); + + double result = dateRange.distance("2021-05-13", "2023-05-13", conf); + assertEquals(1.0, result); + + result = dateRange.distance("2021-05-13", "2025-05-13", conf); + assertEquals(0.0, result); + + result = dateRange.distance("", "2020-05-05", conf); + assertEquals(-1.0, result); + + result = dateRange.distance("invalid date", "2021-05-02", conf); + assertEquals(-1.0, result); + + } + } From a877c76d70a10afdd888f84a33cbe683c2f78755 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 30 Oct 2024 11:24:25 +0100 Subject: [PATCH 04/73] make MergeUtils.selectOldestDate less prone to errors when receiving invalid date formats --- .../schema/oaf/utils/GraphCleaningFunctions.java | 2 +- .../dnetlib/dhp/schema/oaf/utils/MergeUtils.java | 15 +++++---------- 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java index fdfd63a15..b6574da16 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java @@ -810,7 +810,7 @@ public class GraphCleaningFunctions extends CleaningFunctions { return author; } - private static Optional cleanDateField(Field dateofacceptance) { + public static Optional cleanDateField(Field dateofacceptance) { return Optional .ofNullable(dateofacceptance) .map(Field::getValue) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtils.java index 79a12d630..4c411a155 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtils.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtils.java @@ -654,16 +654,9 @@ public class MergeUtils { } private static Field selectOldestDate(Field d1, Field d2) { - if (d1 == null || StringUtils.isBlank(d1.getValue())) { + if (!GraphCleaningFunctions.cleanDateField(d1).isPresent()) { return d2; - } else if (d2 == null || StringUtils.isBlank(d2.getValue())) { - return d1; - } - - if (StringUtils.contains(d1.getValue(), "null")) { - return d2; - } - if (StringUtils.contains(d2.getValue(), "null")) { + } else if (!GraphCleaningFunctions.cleanDateField(d2).isPresent()) { return d1; } @@ -715,7 +708,9 @@ public class MergeUtils { private static String spKeyExtractor(StructuredProperty sp) { return Optional .ofNullable(sp) - .map(s -> Joiner.on("||") + .map( + s -> Joiner + .on("||") .useForNull("") .join(qualifierKeyExtractor(s.getQualifier()), s.getValue())) .orElse(null); From a42c8b7c8541d3225d5d087469af9b7ff5fb9aa7 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 30 Oct 2024 11:25:17 +0100 Subject: [PATCH 05/73] person table directory produced by the workflows raw_all and merge graphs --- .../dhp/oa/graph/merge/oozie_app/workflow.xml | 28 +++++++++++++++++++ .../oa/graph/raw_all/oozie_app/workflow.xml | 27 ++++++++++++++++++ 2 files changed, 55 insertions(+) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/merge/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/merge/oozie_app/workflow.xml index a8d0d5068..3444e3afe 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/merge/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/merge/oozie_app/workflow.xml @@ -68,6 +68,7 @@ + @@ -260,6 +261,33 @@ + + + yarn + cluster + Merge person + eu.dnetlib.dhp.oa.graph.merge.MergeGraphTableSparkJob + dhp-graph-mapper-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=7680 + + --betaInputPath${betaInputGraphPath}/person + --prodInputPath${prodInputGraphPath}/person + --outputPath${graphOutputPath}/person + --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Person + --priority${priority} + + + + + yarn diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml index ff927fe52..1b3cb1111 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml @@ -649,6 +649,7 @@ + @@ -860,6 +861,32 @@ + + + yarn + cluster + MergeClaims_person + eu.dnetlib.dhp.oa.graph.raw.MergeClaimsApplication + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory ${sparkExecutorMemory} + --executor-cores ${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=200 + + --rawGraphPath${workingDir}/graph_raw + --claimsGraphPath${workingDir}/graph_claims + --outputRawGaphPath${graphOutputPath} + --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Person + + + + + From a8ed5a3b048b560a6c2b794834e27e4adcdcac97 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Mon, 4 Nov 2024 17:45:28 +0100 Subject: [PATCH 06/73] Organized getters and setters in the PMArticle class for better readability and maintainability. --- dhp-shade-package/pom.xml | 122 +++---- .../personentity/ExtractPerson.java | 8 +- .../dnetlib/dhp/sx/bio/pubmed/PMArticle.java | 312 +++++------------- .../sx/bio/ebi/baseline_to_oaf_params.json | 7 +- .../sx/bio/ebi/SparkCreatePubmedDump.scala | 90 +++++ .../dnetlib/dhp/sx/bio/pubmed/PMParser2.scala | 264 +++++++++++++++ .../dhp/sx/graph/bio/single_pubmed.xml | 222 +++++++++++++ .../dnetlib/dhp/sx/bio/BioScholixTest.scala | 28 +- 8 files changed, 754 insertions(+), 299 deletions(-) create mode 100644 dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreatePubmedDump.scala create mode 100644 dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PMParser2.scala create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/single_pubmed.xml diff --git a/dhp-shade-package/pom.xml b/dhp-shade-package/pom.xml index d8e17ed46..c4f9b262e 100644 --- a/dhp-shade-package/pom.xml +++ b/dhp-shade-package/pom.xml @@ -26,16 +26,16 @@ - - eu.dnetlib.dhp - dhp-actionmanager - ${project.version} - - + + + eu.dnetlib.dhp + dhp-aggregation + ${project.version} + @@ -56,61 +56,61 @@ - - eu.dnetlib.dhp - dhp-graph-mapper - ${project.version} - - - eu.dnetlib.dhp - dhp-graph-provision - ${project.version} - - - eu.dnetlib.dhp - dhp-impact-indicators - ${project.version} - - - eu.dnetlib.dhp - dhp-stats-actionsets - ${project.version} - - - eu.dnetlib.dhp - dhp-stats-hist-snaps - ${project.version} - - - eu.dnetlib.dhp - dhp-stats-monitor-irish - ${project.version} - - - eu.dnetlib.dhp - dhp-stats-promote - ${project.version} - - - eu.dnetlib.dhp - dhp-stats-update - ${project.version} - - - eu.dnetlib.dhp - dhp-swh - ${project.version} - - - eu.dnetlib.dhp - dhp-usage-raw-data-update - ${project.version} - - - eu.dnetlib.dhp - dhp-usage-stats-build - ${project.version} - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/personentity/ExtractPerson.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/personentity/ExtractPerson.java index bf2c19c3d..db31bb43f 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/personentity/ExtractPerson.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/personentity/ExtractPerson.java @@ -15,6 +15,7 @@ import java.util.stream.Collectors; import org.apache.commons.cli.ParseException; import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; @@ -29,7 +30,6 @@ import org.apache.spark.sql.Dataset; import org.jetbrains.annotations.NotNull; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.spark_project.jetty.util.StringUtil; import com.fasterxml.jackson.databind.ObjectMapper; @@ -206,7 +206,7 @@ public class ExtractPerson implements Serializable { null); relation.setValidated(true); - if (StringUtil.isNotBlank(role)) { + if (StringUtils.isNotBlank(role)) { KeyValue kv = new KeyValue(); kv.setKey("role"); kv.setValue(role); @@ -439,13 +439,13 @@ public class ExtractPerson implements Serializable { null); relation.setValidated(true); - if (Optional.ofNullable(row.getStartDate()).isPresent() && StringUtil.isNotBlank(row.getStartDate())) { + if (Optional.ofNullable(row.getStartDate()).isPresent() && StringUtils.isNotBlank(row.getStartDate())) { KeyValue kv = new KeyValue(); kv.setKey("startDate"); kv.setValue(row.getStartDate()); properties.add(kv); } - if (Optional.ofNullable(row.getEndDate()).isPresent() && StringUtil.isNotBlank(row.getEndDate())) { + if (Optional.ofNullable(row.getEndDate()).isPresent() && StringUtils.isNotBlank(row.getEndDate())) { KeyValue kv = new KeyValue(); kv.setKey("endDate"); kv.setValue(row.getEndDate()); diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMArticle.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMArticle.java index 3fb814606..6191f6446 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMArticle.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMArticle.java @@ -8,259 +8,115 @@ import java.util.List; /** * This class represent an instance of Pubmed Article extracted from the native XML * - * @author Sandro La Bruzzo */ - public class PMArticle implements Serializable { - /** - * the Pubmed Identifier - */ private String pmid; - private String pmcId; - - /** - * the DOI - */ private String doi; - /** - * the Pubmed Date extracted from Specifies a date significant to either the article's history or the citation's processing. - * All dates will have a , , and elements. Some may have an , , and element(s). - */ private String date; - /** - * This is an 'envelop' element that contains various elements describing the journal cited; i.e., ISSN, Volume, Issue, and PubDate and author name(s), however, it does not contain data itself. - */ private PMJournal journal; - /** - * The full journal title (taken from NLM cataloging data following NLM rules for how to compile a serial name) is exported in this element. Some characters that are not part of the NLM MEDLINE/PubMed Character Set reside in a relatively small number of full journal titles. The NLM journal title abbreviation is exported in the element. - */ private String title; - /** - * English-language abstracts are taken directly from the published article. - * If the article does not have a published abstract, the National Library of Medicine does not create one, - * thus the record lacks the and elements. However, in the absence of a formally - * labeled abstract in the published article, text from a substantive "summary", "summary and conclusions" or "conclusions and summary" may be used. - */ private String description; - /** - * the language in which an article was published is recorded in . - * All entries are three letter abbreviations stored in lower case, such as eng, fre, ger, jpn, etc. When a single - * record contains more than one language value the XML export program extracts the languages in alphabetic order by the 3-letter language value. - * Some records provided by collaborating data producers may contain the value und to identify articles whose language is undetermined. - */ private String language; - - /** - * NLM controlled vocabulary, Medical Subject Headings (MeSH®), is used to characterize the content of the articles represented by MEDLINE citations. * - */ - private final List subjects = new ArrayList<>(); - /** - * This element is used to identify the type of article indexed for MEDLINE; - * it characterizes the nature of the information or the manner in which it is conveyed as well as the type of - * research support received (e.g., Review, Letter, Retracted Publication, Clinical Conference, Research Support, N.I.H., Extramural). - */ - private final List publicationTypes = new ArrayList<>(); - /** - * Personal and collective (corporate) author names published with the article are found in . - */ + private List subjects; + private List publicationTypes = new ArrayList<>(); private List authors = new ArrayList<>(); + private List grants = new ArrayList<>(); - /** - * contains the research grant or contract number (or both) that designates financial support by any agency of the United States Public Health Service - * or any institute of the National Institutes of Health. Additionally, beginning in late 2005, grant numbers are included for many other US and non-US funding agencies and organizations. - */ - private final List grants = new ArrayList<>(); - - /** - * get the DOI - * @return a DOI - */ - public String getDoi() { - return doi; - } - - /** - * Set the DOI - * @param doi a DOI - */ - public void setDoi(String doi) { - this.doi = doi; - } - - /** - * get the Pubmed Identifier - * @return the PMID - */ public String getPmid() { return pmid; } - /** - * set the Pubmed Identifier - * @param pmid the Pubmed Identifier - */ public void setPmid(String pmid) { this.pmid = pmid; } - /** - * the Pubmed Date extracted from Specifies a date significant to either the article's history or the citation's processing. - * All dates will have a , , and elements. Some may have an , , and element(s). - * - * @return the Pubmed Date - */ - public String getDate() { - return date; - } - - /** - * Set the pubmed Date - * @param date - */ - public void setDate(String date) { - this.date = date; - } - - /** - * The full journal title (taken from NLM cataloging data following NLM rules for how to compile a serial name) is exported in this element. - * Some characters that are not part of the NLM MEDLINE/PubMed Character Set reside in a relatively small number of full journal titles. - * The NLM journal title abbreviation is exported in the element. - * - * @return the pubmed Journal Extracted - */ - public PMJournal getJournal() { - return journal; - } - - /** - * Set the mapped pubmed Journal - * @param journal - */ - public void setJournal(PMJournal journal) { - this.journal = journal; - } - - /** - * contains the entire title of the journal article. is always in English; - * those titles originally published in a non-English language and translated for are enclosed in square brackets. - * All titles end with a period unless another punctuation mark such as a question mark or bracket is present. - * Explanatory information about the title itself is enclosed in parentheses, e.g.: (author's transl). - * Corporate/collective authors may appear at the end of for citations up to about the year 2000. - * - * @return the extracted pubmed Title - */ - public String getTitle() { - return title; - } - - /** - * set the pubmed title - * @param title - */ - public void setTitle(String title) { - this.title = title; - } - - /** - * English-language abstracts are taken directly from the published article. - * If the article does not have a published abstract, the National Library of Medicine does not create one, - * thus the record lacks the and elements. However, in the absence of a formally - * labeled abstract in the published article, text from a substantive "summary", "summary and conclusions" or "conclusions and summary" may be used. - * - * @return the Mapped Pubmed Article Abstracts - */ - public String getDescription() { - return description; - } - - /** - * Set the Mapped Pubmed Article Abstracts - * @param description - */ - public void setDescription(String description) { - this.description = description; - } - - /** - * Personal and collective (corporate) author names published with the article are found in . - * - * @return get the Mapped Authors lists - */ - public List getAuthors() { - return authors; - } - - /** - * Set the Mapped Authors lists - * @param authors - */ - public void setAuthors(List authors) { - this.authors = authors; - } - - /** - * This element is used to identify the type of article indexed for MEDLINE; - * it characterizes the nature of the information or the manner in which it is conveyed as well as the type of - * research support received (e.g., Review, Letter, Retracted Publication, Clinical Conference, Research Support, N.I.H., Extramural). - * - * @return the mapped Subjects - */ - public List getSubjects() { - return subjects; - } - - /** - * - * the language in which an article was published is recorded in . - * All entries are three letter abbreviations stored in lower case, such as eng, fre, ger, jpn, etc. When a single - * record contains more than one language value the XML export program extracts the languages in alphabetic order by the 3-letter language value. - * Some records provided by collaborating data producers may contain the value und to identify articles whose language is undetermined. - * - * @return The mapped Language - */ - public String getLanguage() { - return language; - } - - /** - * - * Set The mapped Language - * - * @param language the mapped Language - */ - public void setLanguage(String language) { - this.language = language; - } - - /** - * This element is used to identify the type of article indexed for MEDLINE; - * it characterizes the nature of the information or the manner in which it is conveyed as well as the type of - * research support received (e.g., Review, Letter, Retracted Publication, Clinical Conference, Research Support, N.I.H., Extramural). - * - * @return the mapped Publication Type - */ - public List getPublicationTypes() { - return publicationTypes; - } - - /** - * contains the research grant or contract number (or both) that designates financial support by any agency of the United States Public Health Service - * or any institute of the National Institutes of Health. Additionally, beginning in late 2005, grant numbers are included for many other US and non-US funding agencies and organizations. - * @return the mapped grants - */ - - public List getGrants() { - return grants; - } - public String getPmcId() { return pmcId; } - public PMArticle setPmcId(String pmcId) { + public void setPmcId(String pmcId) { this.pmcId = pmcId; - return this; + } + + public String getDoi() { + return doi; + } + + public void setDoi(String doi) { + this.doi = doi; + } + + public String getDate() { + return date; + } + + public void setDate(String date) { + this.date = date; + } + + public PMJournal getJournal() { + return journal; + } + + public void setJournal(PMJournal journal) { + this.journal = journal; + } + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = title; + } + + public String getDescription() { + return description; + } + + public void setDescription(String description) { + this.description = description; + } + + public String getLanguage() { + return language; + } + + public void setLanguage(String language) { + this.language = language; + } + + public List getSubjects() { + return subjects; + } + + public void setSubjects(List subjects) { + this.subjects = subjects; + } + + public List getPublicationTypes() { + return publicationTypes; + } + + public void setPublicationTypes(List publicationTypes) { + this.publicationTypes = publicationTypes; + } + + public List getAuthors() { + return authors; + } + + public void setAuthors(List authors) { + this.authors = authors; + } + + public List getGrants() { + return grants; + } + + public void setGrants(List grants) { + this.grants = grants; } } diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/ebi/baseline_to_oaf_params.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/ebi/baseline_to_oaf_params.json index 3ba83764d..8326fab0f 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/ebi/baseline_to_oaf_params.json +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/ebi/baseline_to_oaf_params.json @@ -1,8 +1,7 @@ [ {"paramName":"mt", "paramLongName":"master", "paramDescription": "should be local or yarn", "paramRequired": true}, {"paramName":"i", "paramLongName":"isLookupUrl", "paramDescription": "isLookupUrl", "paramRequired": true}, - {"paramName":"w", "paramLongName":"workingPath", "paramDescription": "the path of the sequencial file to read", "paramRequired": true}, - {"paramName":"mo", "paramLongName":"mdstoreOutputVersion", "paramDescription": "the oaf path ", "paramRequired": true}, - {"paramName":"s", "paramLongName":"skipUpdate", "paramDescription": "skip update ", "paramRequired": false}, - {"paramName":"h", "paramLongName":"hdfsServerUri", "paramDescription": "the working path ", "paramRequired": true} + {"paramName":"s", "paramLongName":"sourcePath", "paramDescription": "the baseline path", "paramRequired": true}, + {"paramName":"t", "paramLongName":"targetPath", "paramDescription": "the mdstore path to save", "paramRequired": true} + ] \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreatePubmedDump.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreatePubmedDump.scala new file mode 100644 index 000000000..c21bfd7c3 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreatePubmedDump.scala @@ -0,0 +1,90 @@ +package eu.dnetlib.dhp.sx.bio.ebi + +import com.fasterxml.jackson.databind.ObjectMapper +import eu.dnetlib.dhp.application.{AbstractScalaApplication, ArgumentApplicationParser} +import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup +import eu.dnetlib.dhp.schema.oaf.Oaf +import eu.dnetlib.dhp.sx.bio.pubmed.{PMArticle, PMAuthor, PMJournal, PMParser, PMParser2, PubMedToOaf} +import eu.dnetlib.dhp.utils.ISLookupClientFactory +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} +import org.slf4j.{Logger, LoggerFactory} + +import java.io.ByteArrayInputStream +import javax.xml.stream.XMLInputFactory + +class SparkCreatePubmedDump(propertyPath: String, args: Array[String], log: Logger) + extends AbstractScalaApplication(propertyPath, args, log: Logger) { + + /** Here all the spark applications runs this method + * where the whole logic of the spark node is defined + */ + override def run(): Unit = { + val isLookupUrl: String = parser.get("isLookupUrl") + log.info("isLookupUrl: {}", isLookupUrl) + val sourcePath = parser.get("sourcePath") + log.info(s"SourcePath is '$sourcePath'") + val targetPath = parser.get("targetPath") + log.info(s"TargetPath is '$targetPath'") + + val isLookupService = ISLookupClientFactory.getLookUpService(isLookupUrl) + val vocabularies = VocabularyGroup.loadVocsFromIS(isLookupService) + + createPubmedDump(spark, sourcePath, targetPath, vocabularies) + + } + + def createPubmedDump( + spark: SparkSession, + sourcePath: String, + targetPath: String, + vocabularies: VocabularyGroup + ): Unit = { + require(spark != null) + + implicit val PMEncoder: Encoder[PMArticle] = Encoders.bean(classOf[PMArticle]) + + import spark.implicits._ + val df = spark.read.option("lineSep", "").text(sourcePath) + val mapper = new ObjectMapper() + df.as[String] + .map(s => { + val id = s.indexOf("") + if (id >= 0) s"${s.substring(id)}" else null + }) + .filter(s => s != null) + .map { i => + try { + new PMParser2().parse(i) + } catch { + case _: Exception => { + throw new RuntimeException(s"Error parsing article: $i") + } + } + } + .dropDuplicates("pmid") + .map { a => + val oaf = PubMedToOaf.convert(a, vocabularies) + if (oaf != null) + mapper.writeValueAsString(oaf) + else + null + } + .as[String] + .filter(s => s != null) + .write + .option("compression", "gzip") + .mode("overwrite") + .text(targetPath) + } +} + +object SparkCreatePubmedDump { + + def main(args: Array[String]): Unit = { + val log: Logger = LoggerFactory.getLogger(getClass) + + new SparkCreatePubmedDump("/eu/dnetlib/dhp/sx/bio/ebi/baseline_to_oaf_params.json", args, log).initialize().run() + + } +} diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PMParser2.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PMParser2.scala new file mode 100644 index 000000000..c9e868185 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PMParser2.scala @@ -0,0 +1,264 @@ +package eu.dnetlib.dhp.sx.bio.pubmed + +import org.apache.commons.lang3.StringUtils + +import javax.xml.stream.XMLEventReader +import scala.collection.JavaConverters._ +import scala.xml.{MetaData, NodeSeq} +import scala.xml.pull.{EvElemEnd, EvElemStart, EvText} + +class PMParser2 { + + /** Extracts the value of an attribute from a MetaData object. + * @param attrs the MetaData object + * @param key the key of the attribute + * @return the value of the attribute or null if the attribute is not found + */ + private def extractAttributes(attrs: MetaData, key: String): String = { + + val res = attrs.get(key) + if (res.isDefined) { + val s = res.get + if (s != null && s.nonEmpty) + s.head.text + else + null + } else null + } + + /** Validates and formats a date given the year, month, and day as strings. + * + * @param year the year as a string + * @param month the month as a string + * @param day the day as a string + * @return the formatted date as "YYYY-MM-DD" or null if the date is invalid + */ + private def validate_Date(year: String, month: String, day: String): String = { + try { + f"${year.toInt}-${month.toInt}%02d-${day.toInt}%02d" + + } catch { + case _: Throwable => null + } + } + + /** Extracts the grant information from a NodeSeq object. + * + * @param gNode the NodeSeq object + * @return the grant information or an empty list if the grant information is not found + */ + private def extractGrant(gNode: NodeSeq): List[PMGrant] = { + gNode + .map(node => { + val grantId = (node \ "GrantID").text + val agency = (node \ "Agency").text + val country = (node \ "Country").text + new PMGrant(grantId, agency, country) + }) + .toList + } + + /** Extracts the journal information from a NodeSeq object. + * + * @param jNode the NodeSeq object + * @return the journal information or null if the journal information is not found + */ + private def extractJournal(jNode: NodeSeq): PMJournal = { + val journal = new PMJournal + journal.setTitle((jNode \ "Title").text) + journal.setIssn((jNode \ "ISSN").text) + journal.setVolume((jNode \ "JournalIssue" \ "Volume").text) + journal.setIssue((jNode \ "JournalIssue" \ "Issue").text) + if (journal.getTitle != null && StringUtils.isNotEmpty(journal.getTitle)) + journal + else + null + } + + private def extractAuthors(aNode: NodeSeq): List[PMAuthor] = { + aNode + .map(author => { + val a = new PMAuthor + a.setLastName((author \ "LastName").text) + a.setForeName((author \ "ForeName").text) + a + }) + .toList + } + + def parse(input: String): PMArticle = { + val xml = scala.xml.XML.loadString(input) + val article = new PMArticle + + val grantNodes = xml \ "MedlineCitation" \\ "Grant" + article.setGrants(extractGrant(grantNodes).asJava) + + val journal = xml \ "MedlineCitation" \ "Article" \ "Journal" + article.setJournal(extractJournal(journal)) + + val authors = xml \ "MedlineCitation" \ "Article" \ "AuthorList" \ "Author" + + article.setAuthors( + authors + .map(author => { + val a = new PMAuthor + a.setLastName((author \ "LastName").text) + a.setForeName((author \ "ForeName").text) + a + }) + .toList + .asJava + ) + + val pmId = xml \ "MedlineCitation" \ "PMID" + + val articleIds = xml \ "PubmedData" \ "ArticleIdList" \ "ArticleId" + articleIds.foreach(articleId => { + val idType = (articleId \ "@IdType").text + val id = articleId.text + if ("doi".equalsIgnoreCase(idType)) article.setDoi(id) + if ("pmc".equalsIgnoreCase(idType)) article.setPmcId(id) + }) + article.setPmid(pmId.text) + + val pubMedPubDate = xml \ "MedlineCitation" \ "DateCompleted" + val currentDate = + validate_Date((pubMedPubDate \ "Year").text, (pubMedPubDate \ "Month").text, (pubMedPubDate \ "Day").text) + if (currentDate != null) article.setDate(currentDate) + + val articleTitle = xml \ "MedlineCitation" \ "Article" \ "ArticleTitle" + article.setTitle(articleTitle.text) + + val abstractText = xml \ "MedlineCitation" \ "Article" \ "Abstract" \ "AbstractText" + if (abstractText != null && abstractText.text != null && abstractText.text.nonEmpty) + article.setDescription(abstractText.text.split("\n").map(s => s.trim).mkString(" ").trim) + + val language = xml \ "MedlineCitation" \ "Article" \ "Language" + article.setLanguage(language.text) + + val subjects = xml \ "MedlineCitation" \ "MeshHeadingList" \ "MeshHeading" + article.setSubjects( + subjects + .take(20) + .map(subject => { + val descriptorName = (subject \ "DescriptorName").text + val ui = (subject \ "DescriptorName" \ "@UI").text + val s = new PMSubject + s.setValue(descriptorName) + s.setMeshId(ui) + s + }) + .toList + .asJava + ) + val publicationTypes = xml \ "MedlineCitation" \ "Article" \ "PublicationTypeList" \ "PublicationType" + article.setPublicationTypes( + publicationTypes + .map(pt => { + val s = new PMSubject + s.setValue(pt.text) + s + }) + .toList + .asJava + ) + + article + } + + def parse2(xml: XMLEventReader): PMArticle = { + var currentArticle: PMArticle = null + var currentSubject: PMSubject = null + var currentAuthor: PMAuthor = null + var currentJournal: PMJournal = null + var currentGrant: PMGrant = null + var currNode: String = null + var currentYear = "0" + var currentMonth = "01" + var currentDay = "01" + var currentArticleType: String = null + + while (xml.hasNext) { + val ne = xml.next + ne match { + case EvElemStart(_, label, attrs, _) => + currNode = label + + label match { + case "PubmedArticle" => currentArticle = new PMArticle + case "Author" => currentAuthor = new PMAuthor + case "Journal" => currentJournal = new PMJournal + case "Grant" => currentGrant = new PMGrant + case "PublicationType" | "DescriptorName" => + currentSubject = new PMSubject + currentSubject.setMeshId(extractAttributes(attrs, "UI")) + case "ArticleId" => currentArticleType = extractAttributes(attrs, "IdType") + case _ => + } + case EvElemEnd(_, label) => + label match { + case "PubmedArticle" => return currentArticle + case "Author" => currentArticle.getAuthors.add(currentAuthor) + case "Journal" => currentArticle.setJournal(currentJournal) + case "Grant" => currentArticle.getGrants.add(currentGrant) + case "PubMedPubDate" => + if (currentArticle.getDate == null) + currentArticle.setDate(validate_Date(currentYear, currentMonth, currentDay)) + case "PubDate" => currentJournal.setDate(s"$currentYear-$currentMonth-$currentDay") + case "DescriptorName" => currentArticle.getSubjects.add(currentSubject) + case "PublicationType" => currentArticle.getPublicationTypes.add(currentSubject) + case _ => + } + case EvText(text) => + if (currNode != null && text.trim.nonEmpty) + currNode match { + case "ArticleTitle" => { + if (currentArticle.getTitle == null) + currentArticle.setTitle(text.trim) + else + currentArticle.setTitle(currentArticle.getTitle + text.trim) + } + case "AbstractText" => { + if (currentArticle.getDescription == null) + currentArticle.setDescription(text.trim) + else + currentArticle.setDescription(currentArticle.getDescription + text.trim) + } + case "PMID" => currentArticle.setPmid(text.trim) + case "ArticleId" => + if ("doi".equalsIgnoreCase(currentArticleType)) currentArticle.setDoi(text.trim) + if ("pmc".equalsIgnoreCase(currentArticleType)) currentArticle.setPmcId(text.trim) + case "Language" => currentArticle.setLanguage(text.trim) + case "ISSN" => currentJournal.setIssn(text.trim) + case "GrantID" => currentGrant.setGrantID(text.trim) + case "Agency" => currentGrant.setAgency(text.trim) + case "Country" => if (currentGrant != null) currentGrant.setCountry(text.trim) + case "Year" => currentYear = text.trim + case "Month" => currentMonth = text.trim + case "Day" => currentDay = text.trim + case "Volume" => currentJournal.setVolume(text.trim) + case "Issue" => currentJournal.setIssue(text.trim) + case "PublicationType" | "DescriptorName" => currentSubject.setValue(text.trim) + case "LastName" => { + if (currentAuthor != null) + currentAuthor.setLastName(text.trim) + } + case "ForeName" => + if (currentAuthor != null) + currentAuthor.setForeName(text.trim) + case "Title" => + if (currentJournal.getTitle == null) + currentJournal.setTitle(text.trim) + else + currentJournal.setTitle(currentJournal.getTitle + text.trim) + case _ => + + } + case _ => + } + + } + null + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/single_pubmed.xml b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/single_pubmed.xml new file mode 100644 index 000000000..4b4d860d7 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/single_pubmed.xml @@ -0,0 +1,222 @@ + + + 37885214 + + 2024 + 02 + 14 + + + 2024 + 02 + 14 + +
+ + 2752-7549 + + 40 + 5 + + 2023 Sep-Oct + + + Journal of pediatric hematology/oncology nursing + J Pediatr Hematol Oncol Nurs + + Care Needs of Parents of Children With Cancer in a Low-Middle-Income Country. + + 295-304 + + 10.1177/27527530231193972 + + Background: Mapping out actual supportive care needs assists nurses in providing holistic individualized care. This study aimed to explore the care needs of parents of children with cancer in the Philippines. Method: Guided by the Supportive Care Needs Framework (SCNF), this study used an embedded mixed-method design with the quantitative revised Cancer Patient Needs Questionnaire and qualitative semistructured interviews to describe parents' care needs and priorities. Results: Filipino parents (N = 156) of children with cancer have various care needs which could be classified along the SCNF categories-practical, informational, spiritual, physical, emotional, and physical needs as ranked from highest to lowest. A number of variables were significantly associated with care needs. Solid tumor diagnosis was associated with greater practical, emotional, and psychosocial care needs; having a child who had undergone surgery was associated with more practical and spiritual care needs; and being within one year of the child's diagnosis was associated with practical, psychosocial, and spiritual care needs. Parent priority needs included (a) addressing financial needs; (b) access to temporary housing to minimize treatment-related costs; (c) support groups among parents of children with cancer as a source of information; (d) financial and social support between members of family and partners of parents of children with cancer; and (e) using prayer to facilitate acceptance. Conclusions: Supportive care needs of parents of children with cancer are important components of care that should be given recognition to enhance holistic individualized care throughout the childhood cancer experience. + + + + Banayat + Aprille Campos + AC + 0000-0001-9339-9871 + + College of Nursing, University of the Philippines Manila, Manila, Philippines. + + + + Abad + Peter James B + PJB + + College of Nursing, University of the Philippines Manila, Manila, Philippines. + + + + Bonito + Sheila R + SR + + College of Nursing, University of the Philippines Manila, Manila, Philippines. + + + + Manahan + Lydia T + LT + + College of Nursing, University of the Philippines Manila, Manila, Philippines. + + + + Peralta + Arnold B + AB + + College of Nursing, University of the Philippines Manila, Manila, Philippines. + + + + eng + + Journal Article + + + 2023 + 10 + 26 + +
+ + United States + J Pediatr Hematol Oncol Nurs + 9918282681506676 + 2752-7530 + + IM + + + Child + + + Humans + + + Parents + psychology + + + Social Support + + + Spirituality + + + Religion + + + Neoplasms + therapy + + + + cancer + mixed methods + parent + pediatric + research + supportive care + + Declaration of Conflicting InterestsThe author(s) declared no potential conflicts of interest with respect to the research, authorship, and/or publication of this article. +
+ + + + 2024 + 2 + 12 + 18 + 42 + + + 2023 + 10 + 27 + 6 + 42 + + + 2023 + 10 + 27 + 3 + 43 + + + ppublish + + 37885214 + 10.1177/27527530231193972 + + +
+ +30522158 +32769323 +34061701 +34661197 +34837091 +35035475 +35211699 +35557982 +35782783 +35795240 +35832688 +35847411 +36081602 +36081858 +36468085 +36468934 +36580086 +36589526 +36619609 +36649460 +36654909 +36655054 +36700856 +36705625 +36713939 +36714172 +36741203 +36741905 +36743825 +36788221 +36844926 +36846546 +36935776 +36946757 +36972191 +37034422 +37124311 +37152108 +37171968 +37273889 +37333905 +37387733 +37431449 +37576947 +37601162 +37711214 +37901290 +37981909 +37981945 +37982005 +38037601 +38037602 +38150730 +38274640 +38332671 +38334184 +38335456 +38349506 +38349576 +38353676 + \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala b/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala index c4af14c40..1374b741d 100644 --- a/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala +++ b/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala @@ -5,7 +5,10 @@ import eu.dnetlib.dhp.aggregation.AbstractVocabularyTest import eu.dnetlib.dhp.schema.oaf.utils.PidType import eu.dnetlib.dhp.schema.oaf.{Oaf, Publication, Relation, Result} import eu.dnetlib.dhp.sx.bio.BioDBToOAF.ScholixResolved -import eu.dnetlib.dhp.sx.bio.pubmed.{PMArticle, PMParser, PMSubject, PubMedToOaf} +import eu.dnetlib.dhp.sx.bio.ebi.SparkCreatePubmedDump +import eu.dnetlib.dhp.sx.bio.pubmed.{PMArticle, PMAuthor, PMJournal, PMParser, PMParser2, PMSubject, PubMedToOaf} +import org.apache.commons.io.IOUtils +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} import org.json4s.DefaultFormats import org.json4s.JsonAST.{JField, JObject, JString} import org.json4s.jackson.JsonMethods.parse @@ -13,8 +16,9 @@ import org.junit.jupiter.api.Assertions._ import org.junit.jupiter.api.extension.ExtendWith import org.junit.jupiter.api.{BeforeEach, Test} import org.mockito.junit.jupiter.MockitoExtension +import org.slf4j.LoggerFactory -import java.io.{BufferedReader, InputStream, InputStreamReader} +import java.io.{BufferedReader, ByteArrayInputStream, InputStream, InputStreamReader} import java.util.zip.GZIPInputStream import javax.xml.stream.XMLInputFactory import scala.collection.JavaConverters._ @@ -48,6 +52,17 @@ class BioScholixTest extends AbstractVocabularyTest { } } + @Test + def testParsingPubmed2(): Unit = { + val mapper = new ObjectMapper() + val xml = IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/single_pubmed.xml")) + val parser = new PMParser2() + val article = parser.parse(xml) + + println(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(article)) + + } + @Test def testEBIData() = { val inputFactory = XMLInputFactory.newInstance @@ -124,6 +139,15 @@ class BioScholixTest extends AbstractVocabularyTest { } } + @Test + def testPubmedSplitting(): Unit = { + + val spark: SparkSession = SparkSession.builder().appName("test").master("local").getOrCreate() + new SparkCreatePubmedDump("", Array.empty, LoggerFactory.getLogger(getClass)) + .createPubmedDump(spark, "/home/sandro/Downloads/pubmed", "/home/sandro/Downloads/pubmed_mapped", vocabularies) + + } + @Test def testPubmedOriginalID(): Unit = { val article: PMArticle = new PMArticle From c1cef5d685373ad28dac341b56fc459cd43ff606 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Tue, 5 Nov 2024 10:38:40 +0100 Subject: [PATCH 07/73] removed old library joda time replaced with standard java.time introduced in java 8 --- .../plugin/gtr2/Gtr2PublicationsIterator.java | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/gtr2/Gtr2PublicationsIterator.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/gtr2/Gtr2PublicationsIterator.java index 5b8f48680..779c43712 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/gtr2/Gtr2PublicationsIterator.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/gtr2/Gtr2PublicationsIterator.java @@ -1,6 +1,8 @@ package eu.dnetlib.dhp.collection.plugin.gtr2; +import java.nio.charset.StandardCharsets; +import java.time.LocalDate; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; @@ -16,9 +18,7 @@ import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.DocumentHelper; import org.dom4j.Element; -import org.joda.time.DateTime; -import org.joda.time.format.DateTimeFormat; -import org.joda.time.format.DateTimeFormatter; +import java.time.format.DateTimeFormatter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -33,7 +33,7 @@ public class Gtr2PublicationsIterator implements Iterator { private static final Logger log = LoggerFactory.getLogger(Gtr2PublicationsIterator.class); private final HttpConnector2 connector; - private static final DateTimeFormatter simpleDateTimeFormatter = DateTimeFormat.forPattern("yyyy-MM-dd"); + private static final DateTimeFormatter simpleDateTimeFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd"); private static final int MAX_ATTEMPTS = 10; @@ -41,7 +41,7 @@ public class Gtr2PublicationsIterator implements Iterator { private int currPage; private int endPage; private boolean incremental = false; - private DateTime fromDate; + private LocalDate fromDate; private final Map cache = new HashMap<>(); @@ -188,28 +188,28 @@ public class Gtr2PublicationsIterator implements Iterator { private Document loadURL(final String cleanUrl, final int attempt) { try { - log.debug(" * Downloading Url: " + cleanUrl); - final byte[] bytes = this.connector.getInputSource(cleanUrl).getBytes("UTF-8"); + log.debug(" * Downloading Url: {}", cleanUrl); + final byte[] bytes = this.connector.getInputSource(cleanUrl).getBytes(StandardCharsets.UTF_8); return DocumentHelper.parseText(new String(bytes)); } catch (final Throwable e) { - log.error("Error dowloading url: " + cleanUrl + ", attempt = " + attempt, e); + log.error("Error dowloading url: {}, attempt = {}", cleanUrl, attempt, e); if (attempt >= MAX_ATTEMPTS) { - throw new RuntimeException("Error dowloading url: " + cleanUrl, e); + throw new RuntimeException("Error downloading url: " + cleanUrl, e); } try { Thread.sleep(60000); // I wait for a minute } catch (final InterruptedException e1) { - throw new RuntimeException("Error dowloading url: " + cleanUrl, e); + throw new RuntimeException("Error downloading url: " + cleanUrl, e); } return loadURL(cleanUrl, attempt + 1); } } - private DateTime parseDate(final String s) { - return DateTime.parse(s.contains("T") ? s.substring(0, s.indexOf("T")) : s, simpleDateTimeFormatter); + private LocalDate parseDate(final String s) { + return LocalDate.parse(s.contains("T") ? s.substring(0, s.indexOf("T")) : s, simpleDateTimeFormatter); } - private boolean isAfter(final String d, final DateTime fromDate) { + private boolean isAfter(final String d, final LocalDate fromDate) { return StringUtils.isNotBlank(d) && parseDate(d).isAfter(fromDate); } } From 973aa7dca6508e2c1e82fa7c775b3d40c18fe45b Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 6 Nov 2024 12:29:06 +0100 Subject: [PATCH 08/73] [dedup] force the Relation schema when reading the merge rels --- .../java/eu/dnetlib/dhp/oa/dedup/SparkPropagateRelation.java | 1 + 1 file changed, 1 insertion(+) diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPropagateRelation.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPropagateRelation.java index c7efce4d7..b0bc314e2 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPropagateRelation.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPropagateRelation.java @@ -69,6 +69,7 @@ public class SparkPropagateRelation extends AbstractSparkAction { Dataset mergeRels = spark .read() + .schema(REL_BEAN_ENC.schema()) .load(DedupUtility.createMergeRelPath(workingPath, "*", "*")) .as(REL_BEAN_ENC); From f7bb53fe7895105f4a7a73b7de14cd6b4121589e Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 7 Nov 2024 01:04:43 +0100 Subject: [PATCH 09/73] [orcid enrichment] added missing workflow parameter: workingDir --- .../resources/eu/dnetlib/dhp/enrich/orcid/oozie_app/workflow.xml | 1 + 1 file changed, 1 insertion(+) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/enrich/orcid/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/enrich/orcid/oozie_app/workflow.xml index 4031da15a..1ece2c0be 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/enrich/orcid/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/enrich/orcid/oozie_app/workflow.xml @@ -51,6 +51,7 @@ --orcidPath${orcidPath} --targetPath${targetPath} --graphPath${graphPath} + --workingDir${workingDir} --masteryarn From 8f5171557e20ed58f69f7abe3af1ad0a85b10ba3 Mon Sep 17 00:00:00 2001 From: Giambattista Bloisi Date: Thu, 7 Nov 2024 12:22:34 +0100 Subject: [PATCH 10/73] Remove ORCID information when the same ORCID ID is used multiple times in the same result for different authors --- .../oaf/utils/GraphCleaningFunctions.java | 36 ++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java index b6574da16..9153a6476 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java @@ -2,7 +2,6 @@ package eu.dnetlib.dhp.schema.oaf.utils; import static eu.dnetlib.dhp.schema.common.ModelConstants.*; -import static eu.dnetlib.dhp.schema.common.ModelConstants.OPENAIRE_META_RESOURCE_TYPE; import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.getProvenance; import java.net.MalformedURLException; @@ -696,6 +695,7 @@ public class GraphCleaningFunctions extends CleaningFunctions { } } + // set ORCID_PENDING to all orcid values that are not coming from ORCID provenance for (Author a : r.getAuthor()) { if (Objects.isNull(a.getPid())) { a.setPid(Lists.newArrayList()); @@ -752,6 +752,40 @@ public class GraphCleaningFunctions extends CleaningFunctions { .collect(Collectors.toList())); } } + + // Identify clashing ORCIDS:that is same ORCID associated to multiple authors in this result + Map clashing_orcid = new HashMap<>(); + + for (Author a : r.getAuthor()) { + a + .getPid() + .stream() + .filter( + p -> StringUtils + .contains(StringUtils.lowerCase(p.getQualifier().getClassid()), ORCID_PENDING)) + .map(StructuredProperty::getValue) + .distinct() + .forEach(orcid -> clashing_orcid.compute(orcid, (k, v) -> (v == null) ? 1 : v + 1)); + } + + Set clashing = clashing_orcid + .entrySet() + .stream() + .filter(ee -> ee.getValue() > 1) + .map(Map.Entry::getKey) + .collect(Collectors.toSet()); + + // filter out clashing orcids + for (Author a : r.getAuthor()) { + a + .setPid( + a + .getPid() + .stream() + .filter(p -> !clashing.contains(p.getValue())) + .collect(Collectors.toList())); + } + } if (value instanceof Publication) { From 6fd9ec856608c3ca9baeedfa3677a64287483d92 Mon Sep 17 00:00:00 2001 From: Miriam Baglioni Date: Thu, 7 Nov 2024 13:55:31 +0100 Subject: [PATCH 11/73] [danishfunders] added link for danish funders versus the unidentified project for IRFD (501100004836) CF (501100002808) and NNF(501100009708) --- .../doiboost/crossref/Crossref2Oaf.scala | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala index f284a063e..bf11ed0a8 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala @@ -566,7 +566,23 @@ case object Crossref2Oaf { queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY) queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES) case _ => logger.debug("no match for " + funder.DOI.get) - + //Add for Danish funders + //Independent Research Fund Denmark (IRFD) + case "10.13039/501100004836" => + val targetId = getProjectId("irfd________", "1e5e62235d094afd01cd56e65112fc63") + queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY) + queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES) + //Carlsberg Foundation (CF) + case "10.13039/501100002808" => + val targetId = getProjectId("cf__________", "1e5e62235d094afd01cd56e65112fc63") + queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY) + queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES) + //Novo Nordisk Foundation (NNF) + case "10.13039/501100009708" => + val targetId = getProjectId("nnf_________", "1e5e62235d094afd01cd56e65112fc63") + queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY) + queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES) + case _ => logger.debug("no match for " + funder.DOI.get) } } else { From 0d0904f4ec2b1d4424d5d8d5d6d782049c0544cd Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Mon, 11 Nov 2024 10:27:23 +0100 Subject: [PATCH 12/73] updated workflow baseline to direct transform on OAF --- .../sx/bio/ebi/baseline_to_oaf_params.json | 8 ++--- .../dhp/sx/bio/pubmed/oozie_app/workflow.xml | 17 ++++------- .../sx/bio/ebi/SparkCreatePubmedDump.scala | 29 ++++++++++++------- 3 files changed, 27 insertions(+), 27 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/ebi/baseline_to_oaf_params.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/ebi/baseline_to_oaf_params.json index 8326fab0f..0fcc03266 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/ebi/baseline_to_oaf_params.json +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/ebi/baseline_to_oaf_params.json @@ -1,7 +1,7 @@ [ - {"paramName":"mt", "paramLongName":"master", "paramDescription": "should be local or yarn", "paramRequired": true}, - {"paramName":"i", "paramLongName":"isLookupUrl", "paramDescription": "isLookupUrl", "paramRequired": true}, - {"paramName":"s", "paramLongName":"sourcePath", "paramDescription": "the baseline path", "paramRequired": true}, - {"paramName":"t", "paramLongName":"targetPath", "paramDescription": "the mdstore path to save", "paramRequired": true} + {"paramName":"mt", "paramLongName":"master", "paramDescription": "should be local or yarn", "paramRequired": true}, + {"paramName":"i", "paramLongName":"isLookupUrl", "paramDescription": "isLookupUrl", "paramRequired": true}, + {"paramName":"s", "paramLongName":"sourcePath", "paramDescription": "the baseline path", "paramRequired": true}, + {"paramName":"mo", "paramLongName":"mdstoreOutputVersion", "paramDescription": "the mdstore path to save", "paramRequired": true} ] \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/pubmed/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/pubmed/oozie_app/workflow.xml index 30eb41469..0f4c5c249 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/pubmed/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/pubmed/oozie_app/workflow.xml @@ -16,11 +16,6 @@ mdStoreManagerURI the path of the cleaned mdstore - - skipUpdate - false - The request block size - @@ -44,16 +39,16 @@ --mdStoreManagerURI${mdStoreManagerURI} - + - + yarn cluster - Convert Baseline to OAF Dataset - eu.dnetlib.dhp.sx.bio.ebi.SparkCreateBaselineDataFrame + Convert Baseline Pubmed to OAF Dataset + eu.dnetlib.dhp.sx.bio.ebi.SparkCreatePubmedDump dhp-aggregation-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} @@ -65,12 +60,10 @@ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --workingPath${baselineWorkingPath} + --sourcePath${baselineWorkingPath} --mdstoreOutputVersion${wf:actionData('StartTransaction')['mdStoreVersion']} --masteryarn --isLookupUrl${isLookupUrl} - --hdfsServerUri${nameNode} - --skipUpdate${skipUpdate} diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreatePubmedDump.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreatePubmedDump.scala index c21bfd7c3..1bdd2a4bc 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreatePubmedDump.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreatePubmedDump.scala @@ -1,18 +1,14 @@ package eu.dnetlib.dhp.sx.bio.ebi import com.fasterxml.jackson.databind.ObjectMapper -import eu.dnetlib.dhp.application.{AbstractScalaApplication, ArgumentApplicationParser} +import eu.dnetlib.dhp.application.AbstractScalaApplication import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup -import eu.dnetlib.dhp.schema.oaf.Oaf -import eu.dnetlib.dhp.sx.bio.pubmed.{PMArticle, PMAuthor, PMJournal, PMParser, PMParser2, PubMedToOaf} +import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion +import eu.dnetlib.dhp.sx.bio.pubmed.{PMArticle, PMParser2, PubMedToOaf} import eu.dnetlib.dhp.utils.ISLookupClientFactory -import org.apache.spark.sql.functions._ -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} +import org.apache.spark.sql.{Encoder, Encoders, SparkSession} import org.slf4j.{Logger, LoggerFactory} -import java.io.ByteArrayInputStream -import javax.xml.stream.XMLInputFactory - class SparkCreatePubmedDump(propertyPath: String, args: Array[String], log: Logger) extends AbstractScalaApplication(propertyPath, args, log: Logger) { @@ -24,16 +20,26 @@ class SparkCreatePubmedDump(propertyPath: String, args: Array[String], log: Logg log.info("isLookupUrl: {}", isLookupUrl) val sourcePath = parser.get("sourcePath") log.info(s"SourcePath is '$sourcePath'") - val targetPath = parser.get("targetPath") - log.info(s"TargetPath is '$targetPath'") + val mdstoreOutputVersion = parser.get("mdstoreOutputVersion") + log.info(s"mdstoreOutputVersion is '$mdstoreOutputVersion'") + val mapper = new ObjectMapper() + val cleanedMdStoreVersion = mapper.readValue(mdstoreOutputVersion, classOf[MDStoreVersion]) + val outputBasePath = cleanedMdStoreVersion.getHdfsPath + log.info(s"outputBasePath is '$outputBasePath'") val isLookupService = ISLookupClientFactory.getLookUpService(isLookupUrl) val vocabularies = VocabularyGroup.loadVocsFromIS(isLookupService) - createPubmedDump(spark, sourcePath, targetPath, vocabularies) + createPubmedDump(spark, sourcePath, outputBasePath, vocabularies) } + /** This method creates a dump of the pubmed articles + * @param spark the spark session + * @param sourcePath the path of the source file + * @param targetPath the path of the target file + * @param vocabularies the vocabularies + */ def createPubmedDump( spark: SparkSession, sourcePath: String, @@ -54,6 +60,7 @@ class SparkCreatePubmedDump(propertyPath: String, args: Array[String], log: Logg }) .filter(s => s != null) .map { i => + //remove try catch try { new PMParser2().parse(i) } catch { From 19ce783e58f4f9176f4dc9a98d9bb250dc615e0d Mon Sep 17 00:00:00 2001 From: "sandro.labruzzo" Date: Mon, 11 Nov 2024 12:28:02 +0100 Subject: [PATCH 13/73] renamed workflow --- .../eu/dnetlib/dhp/sx/bio/pubmed/oozie_app/workflow.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/pubmed/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/pubmed/oozie_app/workflow.xml index 0f4c5c249..2a51b4050 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/pubmed/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/pubmed/oozie_app/workflow.xml @@ -1,4 +1,4 @@ - + baselineWorkingPath From 474f365286b7f83b8d58e4969277b3e67ebdd0cc Mon Sep 17 00:00:00 2001 From: "sandro.labruzzo" Date: Mon, 11 Nov 2024 12:37:27 +0100 Subject: [PATCH 14/73] removed wrong test --- .../test/scala/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala b/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala index 1374b741d..c942ca132 100644 --- a/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala +++ b/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala @@ -6,9 +6,9 @@ import eu.dnetlib.dhp.schema.oaf.utils.PidType import eu.dnetlib.dhp.schema.oaf.{Oaf, Publication, Relation, Result} import eu.dnetlib.dhp.sx.bio.BioDBToOAF.ScholixResolved import eu.dnetlib.dhp.sx.bio.ebi.SparkCreatePubmedDump -import eu.dnetlib.dhp.sx.bio.pubmed.{PMArticle, PMAuthor, PMJournal, PMParser, PMParser2, PMSubject, PubMedToOaf} +import eu.dnetlib.dhp.sx.bio.pubmed._ import org.apache.commons.io.IOUtils -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} +import org.apache.spark.sql.SparkSession import org.json4s.DefaultFormats import org.json4s.JsonAST.{JField, JObject, JString} import org.json4s.jackson.JsonMethods.parse @@ -18,13 +18,12 @@ import org.junit.jupiter.api.{BeforeEach, Test} import org.mockito.junit.jupiter.MockitoExtension import org.slf4j.LoggerFactory -import java.io.{BufferedReader, ByteArrayInputStream, InputStream, InputStreamReader} +import java.io.{BufferedReader, InputStream, InputStreamReader} import java.util.zip.GZIPInputStream import javax.xml.stream.XMLInputFactory import scala.collection.JavaConverters._ import scala.collection.mutable.ListBuffer import scala.io.Source -import scala.xml.pull.XMLEventReader @ExtendWith(Array(classOf[MockitoExtension])) class BioScholixTest extends AbstractVocabularyTest { @@ -139,7 +138,6 @@ class BioScholixTest extends AbstractVocabularyTest { } } - @Test def testPubmedSplitting(): Unit = { val spark: SparkSession = SparkSession.builder().appName("test").master("local").getOrCreate() From b0283fe94c168b87176f283f414ef2c4dfd3cdab Mon Sep 17 00:00:00 2001 From: Miriam Baglioni Date: Mon, 11 Nov 2024 14:57:57 +0100 Subject: [PATCH 15/73] [person] fix provenance of pid in person when it is orcid (classid entityregistry to avoid the cleaning put orcid_pending) --- .../dhp/actionmanager/personentity/ExtractPerson.java | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/personentity/ExtractPerson.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/personentity/ExtractPerson.java index bf2c19c3d..6830f2291 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/personentity/ExtractPerson.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/personentity/ExtractPerson.java @@ -345,7 +345,16 @@ public class ExtractPerson implements Serializable { OafMapperUtils .structuredProperty( op.getOrcid(), ModelConstants.ORCID, ModelConstants.ORCID_CLASSNAME, - ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES, null)); + ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES, + OafMapperUtils.dataInfo(false, + null, + false, + false, + OafMapperUtils.qualifier(ModelConstants.SYSIMPORT_CROSSWALK_ENTITYREGISTRY, + ModelConstants.SYSIMPORT_CROSSWALK_ENTITYREGISTRY, + ModelConstants.DNET_PID_TYPES, + ModelConstants.DNET_PID_TYPES), + "0.91"))); person.setDateofcollection(op.getLastModifiedDate()); person.setOriginalId(Arrays.asList(op.getOrcid())); person.setDataInfo(ORCIDDATAINFO); From f1ea9da5bcda277451416253982c24233e40d87b Mon Sep 17 00:00:00 2001 From: Miriam Baglioni Date: Mon, 11 Nov 2024 15:37:56 +0100 Subject: [PATCH 16/73] [person] checked type in inferenceprovenance --- .../bipaffiliations/PrepareAffiliationRelations.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelations.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelations.java index 15c1cc376..75e58e665 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelations.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelations.java @@ -104,22 +104,22 @@ public class PrepareAffiliationRelations implements Serializable { .listKeyValues(OPENAIRE_DATASOURCE_ID, OPENAIRE_DATASOURCE_NAME); JavaPairRDD crossrefRelations = prepareAffiliationRelationsNewModel( - spark, crossrefInputPath, collectedfromOpenAIRE, BIP_INFERENCE_PROVENANCE + "::crossref"); + spark, crossrefInputPath, collectedfromOpenAIRE, BIP_INFERENCE_PROVENANCE + ":crossref"); JavaPairRDD pubmedRelations = prepareAffiliationRelations( - spark, pubmedInputPath, collectedfromOpenAIRE, BIP_INFERENCE_PROVENANCE + "::pubmed"); + spark, pubmedInputPath, collectedfromOpenAIRE, BIP_INFERENCE_PROVENANCE + ":pubmed"); JavaPairRDD openAPCRelations = prepareAffiliationRelationsNewModel( - spark, openapcInputPath, collectedfromOpenAIRE, BIP_INFERENCE_PROVENANCE + "::openapc"); + spark, openapcInputPath, collectedfromOpenAIRE, BIP_INFERENCE_PROVENANCE + ":openapc"); JavaPairRDD dataciteRelations = prepareAffiliationRelationsNewModel( - spark, dataciteInputPath, collectedfromOpenAIRE, BIP_INFERENCE_PROVENANCE + "::datacite"); + spark, dataciteInputPath, collectedfromOpenAIRE, BIP_INFERENCE_PROVENANCE + ":datacite"); JavaPairRDD webCrawlRelations = prepareAffiliationRelationsNewModel( - spark, webcrawlInputPath, collectedfromOpenAIRE, BIP_INFERENCE_PROVENANCE + "::rawaff"); + spark, webcrawlInputPath, collectedfromOpenAIRE, BIP_INFERENCE_PROVENANCE + ":rawaff"); JavaPairRDD publisherRelations = prepareAffiliationRelationFromPublisherNewModel( - spark, publisherlInputPath, collectedfromOpenAIRE, BIP_INFERENCE_PROVENANCE + "::webcrawl"); + spark, publisherlInputPath, collectedfromOpenAIRE, BIP_INFERENCE_PROVENANCE + ":webcrawl"); crossrefRelations .union(pubmedRelations) From 250f101779a16ffbec1c16d3e0dc1050d6533c87 Mon Sep 17 00:00:00 2001 From: Miriam Baglioni Date: Mon, 11 Nov 2024 16:04:06 +0100 Subject: [PATCH 17/73] [person] fixed issue in creating project identifier for the graph for person->project relations --- .../dhp/actionmanager/personentity/ExtractPerson.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/personentity/ExtractPerson.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/personentity/ExtractPerson.java index 6830f2291..6976def4c 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/personentity/ExtractPerson.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/personentity/ExtractPerson.java @@ -15,6 +15,7 @@ import java.util.stream.Collectors; import org.apache.commons.cli.ParseException; import org.apache.commons.io.IOUtils; +import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; @@ -193,8 +194,8 @@ public class ExtractPerson implements Serializable { private static Relation getProjectRelation(String project, String orcid, String role) { String source = PERSON_PREFIX + "::" + IdentifierFactory.md5(orcid); - String target = PROJECT_ID_PREFIX + project.substring(0, 14) - + IdentifierFactory.md5(project.substring(15)); + String target = PROJECT_ID_PREFIX + StringUtils.substringBefore(project, "::") + "::" + + IdentifierFactory.md5(StringUtils.substringAfter(project, "::")); List properties = new ArrayList<>(); Relation relation = OafMapperUtils From 6c5df761e21d5aea6c203cb2ca2374b33a9219e5 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 12 Nov 2024 14:18:04 +0100 Subject: [PATCH 18/73] enforce resulttype based on the dnet:result_typologies vocabulary and upon merge --- .../dhp/oa/merge/GroupEntitiesSparkJob.java | 7 ++- .../dhp/schema/oaf/utils/MergeUtils.java | 62 ++++++++++++++++--- .../dhp/oa/dedup/DedupRecordFactory.java | 2 +- .../dhp/oa/dedup/DatasetMergerTest.java | 4 +- .../raw/AbstractMdRecordToOafMapper.java | 11 ++-- .../raw/GenerateEntitiesApplication.java | 2 +- .../dhp/sx/graph/SparkCreateInputGraph.scala | 2 +- 7 files changed, 66 insertions(+), 24 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/GroupEntitiesSparkJob.java b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/GroupEntitiesSparkJob.java index 24de1a787..98ec09277 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/GroupEntitiesSparkJob.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/GroupEntitiesSparkJob.java @@ -2,8 +2,7 @@ package eu.dnetlib.dhp.oa.merge; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; -import static org.apache.spark.sql.functions.col; -import static org.apache.spark.sql.functions.when; +import static org.apache.spark.sql.functions.*; import java.util.Map; import java.util.Optional; @@ -135,7 +134,9 @@ public class GroupEntitiesSparkJob { .applyCoarVocabularies(entity, vocs), OAFENTITY_KRYO_ENC) .groupByKey((MapFunction) OafEntity::getId, Encoders.STRING()) - .mapGroups((MapGroupsFunction) MergeUtils::mergeById, OAFENTITY_KRYO_ENC) + .mapGroups( + (MapGroupsFunction) (key, group) -> MergeUtils.mergeById(group, vocs), + OAFENTITY_KRYO_ENC) .map( (MapFunction>) t -> new Tuple2<>( t.getClass().getName(), t), diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtils.java index 4c411a155..d7e08fca7 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtils.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtils.java @@ -23,24 +23,30 @@ import org.apache.commons.lang3.tuple.Pair; import com.github.sisyphsu.dateparser.DateParserUtils; import com.google.common.base.Joiner; +import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.oa.merge.AuthorMerger; import eu.dnetlib.dhp.schema.common.AccessRightComparator; +import eu.dnetlib.dhp.schema.common.EntityType; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.*; public class MergeUtils { - public static T mergeById(String s, Iterator oafEntityIterator) { - return mergeGroup(s, oafEntityIterator, true); + public static T mergeById(Iterator oafEntityIterator, VocabularyGroup vocs) { + return mergeGroup(oafEntityIterator, true, vocs); } - public static T mergeGroup(String s, Iterator oafEntityIterator) { - return mergeGroup(s, oafEntityIterator, false); + public static T mergeGroup(Iterator oafEntityIterator) { + return mergeGroup(oafEntityIterator, false); } - public static T mergeGroup(String s, Iterator oafEntityIterator, - boolean checkDelegateAuthority) { + public static T mergeGroup(Iterator oafEntityIterator, boolean checkDelegateAuthority) { + return mergeGroup(oafEntityIterator, checkDelegateAuthority, null); + } + + public static T mergeGroup(Iterator oafEntityIterator, + boolean checkDelegateAuthority, VocabularyGroup vocs) { ArrayList sortedEntities = new ArrayList<>(); oafEntityIterator.forEachRemaining(sortedEntities::add); @@ -49,13 +55,49 @@ public class MergeUtils { Iterator it = sortedEntities.iterator(); T merged = it.next(); - while (it.hasNext()) { - merged = checkedMerge(merged, it.next(), checkDelegateAuthority); + if (!it.hasNext() && merged instanceof Result && vocs != null) { + return enforceResultType(vocs, (Result) merged); + } else { + while (it.hasNext()) { + merged = checkedMerge(merged, it.next(), checkDelegateAuthority); + } } - return merged; } + private static T enforceResultType(VocabularyGroup vocs, Result mergedResult) { + if (Optional.ofNullable(mergedResult.getInstance()).map(List::isEmpty).orElse(true)) { + return (T) mergedResult; + } else { + final Instance i = mergedResult.getInstance().get(0); + + if (!vocs.vocabularyExists(ModelConstants.DNET_RESULT_TYPOLOGIES)) { + return (T) mergedResult; + } else { + final Qualifier expectedResultType = vocs + .getSynonymAsQualifier( + ModelConstants.DNET_RESULT_TYPOLOGIES, + i.getInstancetype().getClassid()); + + // there is a clash among the result types + if (!expectedResultType.getClassid().equals(mergedResult.getResulttype().getClassid())) { + try { + String resulttype = expectedResultType.getClassid(); + if (EntityType.otherresearchproduct.toString().equals(resulttype)) { + resulttype = "other"; + } + Result result = (Result) ModelSupport.oafTypes.get(resulttype).newInstance(); + return (T) mergeResultFields(result, mergedResult); + } catch (InstantiationException | IllegalAccessException e) { + throw new IllegalStateException(e); + } + } else { + return (T) mergedResult; + } + } + } + } + public static T checkedMerge(final T left, final T right, boolean checkDelegateAuthority) { return (T) merge(left, right, checkDelegateAuthority); } @@ -106,7 +148,7 @@ public class MergeUtils { return mergeSoftware((Software) left, (Software) right); } - return mergeResultFields((Result) left, (Result) right); + return left; } else if (sameClass(left, right, Datasource.class)) { // TODO final int trust = compareTrust(left, right); diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java index 44482cfdb..f6a436543 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java @@ -135,7 +135,7 @@ public class DedupRecordFactory { return Collections.emptyIterator(); } - OafEntity mergedEntity = MergeUtils.mergeGroup(dedupId, cliques.iterator()); + OafEntity mergedEntity = MergeUtils.mergeGroup(cliques.iterator()); // dedup records do not have date of transformation attribute mergedEntity.setDateoftransformation(null); mergedEntity diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/DatasetMergerTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/DatasetMergerTest.java index 726814c43..a79047590 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/DatasetMergerTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/DatasetMergerTest.java @@ -46,8 +46,8 @@ class DatasetMergerTest implements Serializable { } @Test - void datasetMergerTest() throws InstantiationException, IllegalAccessException, InvocationTargetException { - Dataset pub_merged = MergeUtils.mergeGroup(dedupId, datasets.stream().map(Tuple2::_2).iterator()); + void datasetMergerTest() { + Dataset pub_merged = MergeUtils.mergeGroup(datasets.stream().map(Tuple2::_2).iterator()); // verify id assertEquals(dedupId, pub_merged.getId()); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java index 2436a272c..ba6887a2e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java @@ -155,7 +155,7 @@ public abstract class AbstractMdRecordToOafMapper { final List instances = prepareInstances(doc, entityInfo, collectedFrom, hostedBy); - final String type = getResultType(doc, instances); + final String type = getResultType(instances); return createOafs(doc, type, instances, collectedFrom, entityInfo, lastUpdateTimestamp); } catch (final DocumentException e) { @@ -164,10 +164,9 @@ public abstract class AbstractMdRecordToOafMapper { } } - protected String getResultType(final Document doc, final List instances) { - final String type = doc.valueOf("//dr:CobjCategory/@type"); + protected String getResultType(final List instances) { - if (StringUtils.isBlank(type) && this.vocs.vocabularyExists(ModelConstants.DNET_RESULT_TYPOLOGIES)) { + if (this.vocs.vocabularyExists(ModelConstants.DNET_RESULT_TYPOLOGIES)) { final String instanceType = instances .stream() .map(i -> i.getInstancetype().getClassid()) @@ -178,9 +177,9 @@ public abstract class AbstractMdRecordToOafMapper { .ofNullable(this.vocs.getSynonymAsQualifier(ModelConstants.DNET_RESULT_TYPOLOGIES, instanceType)) .map(Qualifier::getClassid) .orElse("0000"); + } else { + throw new IllegalStateException("Missing vocabulary: " + ModelConstants.DNET_RESULT_TYPOLOGIES); } - - return type; } private KeyValue getProvenanceDatasource(final Document doc, final String xpathId, final String xpathName) { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java index c3806c211..357fae470 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java @@ -133,7 +133,7 @@ public class GenerateEntitiesApplication extends AbstractMigrationApplication { inputRdd .keyBy(oaf -> ModelSupport.idFn().apply(oaf)) .groupByKey() - .map(t -> MergeUtils.mergeGroup(t._1, t._2.iterator())), + .map(t -> MergeUtils.mergeGroup(t._2.iterator())), // .mapToPair(oaf -> new Tuple2<>(ModelSupport.idFn().apply(oaf), oaf)) // .reduceByKey(MergeUtils::merge) // .map(Tuple2::_2), diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateInputGraph.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateInputGraph.scala index d94a23947..42299cd34 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateInputGraph.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateInputGraph.scala @@ -133,7 +133,7 @@ object SparkCreateInputGraph { val ds: Dataset[T] = spark.read.load(sourcePath).as[T] ds.groupByKey(_.getId) - .mapGroups { (id, it) => MergeUtils.mergeGroup(id, it.asJava).asInstanceOf[T] } + .mapGroups { (id, it) => MergeUtils.mergeGroup(it.asJava).asInstanceOf[T] } // .reduceGroups { (x: T, y: T) => MergeUtils.merge(x, y).asInstanceOf[T] } // .map(_) .write From 07f267bb10911d62e30a2b299db3c50fcd1746a2 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 13 Nov 2024 08:14:26 +0100 Subject: [PATCH 19/73] fix vocabulary lookup in mergeutils --- .../dhp/schema/oaf/utils/MergeUtils.java | 425 +++++++++--------- 1 file changed, 215 insertions(+), 210 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtils.java index d7e08fca7..dc76860f8 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtils.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtils.java @@ -16,6 +16,8 @@ import java.util.function.Function; import java.util.stream.Collectors; import java.util.stream.Stream; +import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; +import eu.dnetlib.dhp.schema.common.EntityType; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; @@ -23,10 +25,8 @@ import org.apache.commons.lang3.tuple.Pair; import com.github.sisyphsu.dateparser.DateParserUtils; import com.google.common.base.Joiner; -import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.oa.merge.AuthorMerger; import eu.dnetlib.dhp.schema.common.AccessRightComparator; -import eu.dnetlib.dhp.schema.common.EntityType; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.*; @@ -46,7 +46,7 @@ public class MergeUtils { } public static T mergeGroup(Iterator oafEntityIterator, - boolean checkDelegateAuthority, VocabularyGroup vocs) { + boolean checkDelegateAuthority, VocabularyGroup vocs) { ArrayList sortedEntities = new ArrayList<>(); oafEntityIterator.forEachRemaining(sortedEntities::add); @@ -74,11 +74,16 @@ public class MergeUtils { if (!vocs.vocabularyExists(ModelConstants.DNET_RESULT_TYPOLOGIES)) { return (T) mergedResult; } else { - final Qualifier expectedResultType = vocs - .getSynonymAsQualifier( + final Qualifier expectedResultType = vocs.lookupTermBySynonym( ModelConstants.DNET_RESULT_TYPOLOGIES, i.getInstancetype().getClassid()); + if (Objects.isNull(expectedResultType)) { + throw new IllegalArgumentException( + "instance type not bound to any result type in dnet:result_typologies: " + + i.getInstancetype().getClassid()); + } + // there is a clash among the result types if (!expectedResultType.getClassid().equals(mergedResult.getResulttype().getClassid())) { try { @@ -117,10 +122,10 @@ public class MergeUtils { return mergeRelation((Relation) left, (Relation) right); } else { throw new RuntimeException( - String - .format( - "MERGE_FROM_AND_GET incompatible types: %s, %s", - left.getClass().getCanonicalName(), right.getClass().getCanonicalName())); + String + .format( + "MERGE_FROM_AND_GET incompatible types: %s, %s", + left.getClass().getCanonicalName(), right.getClass().getCanonicalName())); } } @@ -159,10 +164,10 @@ public class MergeUtils { return mergeProject((Project) left, (Project) right); } else { throw new RuntimeException( - String - .format( - "MERGE_FROM_AND_GET incompatible types: %s, %s", - left.getClass().getCanonicalName(), right.getClass().getCanonicalName())); + String + .format( + "MERGE_FROM_AND_GET incompatible types: %s, %s", + left.getClass().getCanonicalName(), right.getClass().getCanonicalName())); } } @@ -253,7 +258,7 @@ public class MergeUtils { } private static List mergeLists(final List left, final List right, int trust, - Function keyExtractor, BinaryOperator merger) { + Function keyExtractor, BinaryOperator merger) { if (left == null || left.isEmpty()) { return right != null ? right : new ArrayList<>(); } else if (right == null || right.isEmpty()) { @@ -264,11 +269,11 @@ public class MergeUtils { List l = trust >= 0 ? right : left; return new ArrayList<>(Stream - .concat(h.stream(), l.stream()) - .filter(Objects::nonNull) - .distinct() - .collect(Collectors.toMap(keyExtractor, v -> v, merger, LinkedHashMap::new)) - .values()); + .concat(h.stream(), l.stream()) + .filter(Objects::nonNull) + .distinct() + .collect(Collectors.toMap(keyExtractor, v -> v, merger, LinkedHashMap::new)) + .values()); } private static List unionDistinctLists(final List left, final List right, int trust) { @@ -282,10 +287,10 @@ public class MergeUtils { List l = trust >= 0 ? right : left; return Stream - .concat(h.stream(), l.stream()) - .filter(Objects::nonNull) - .distinct() - .collect(Collectors.toList()); + .concat(h.stream(), l.stream()) + .filter(Objects::nonNull) + .distinct() + .collect(Collectors.toList()); } private static List unionDistinctListOfString(final List l, final List r) { @@ -296,10 +301,10 @@ public class MergeUtils { } return Stream - .concat(l.stream(), r.stream()) - .filter(StringUtils::isNotBlank) - .distinct() - .collect(Collectors.toList()); + .concat(l.stream(), r.stream()) + .filter(StringUtils::isNotBlank) + .distinct() + .collect(Collectors.toList()); } // TODO review @@ -325,7 +330,7 @@ public class MergeUtils { } private static List unionTitle(List left, List right, - int trust) { + int trust) { if (left == null) { return right; } else if (right == null) { @@ -336,10 +341,10 @@ public class MergeUtils { List l = trust >= 0 ? right : left; return Stream - .concat(h.stream(), l.stream()) - .filter(Objects::isNull) - .distinct() - .collect(Collectors.toList()); + .concat(h.stream(), l.stream()) + .filter(Objects::isNull) + .distinct() + .collect(Collectors.toList()); } /** @@ -374,8 +379,8 @@ public class MergeUtils { merged.setPid(mergeLists(merged.getPid(), enrich.getPid(), trust, MergeUtils::spKeyExtractor, (p1, p2) -> p1)); merged.setDateofcollection(LocalDateTime.now().toString()); merged - .setDateoftransformation( - chooseString(merged.getDateoftransformation(), enrich.getDateoftransformation(), trust)); + .setDateoftransformation( + chooseString(merged.getDateoftransformation(), enrich.getDateoftransformation(), trust)); merged.setExtraInfo(unionDistinctLists(merged.getExtraInfo(), enrich.getExtraInfo(), trust)); // When merging records OAI provenance becomes null merged.setOaiprovenance(null); @@ -392,7 +397,7 @@ public class MergeUtils { checkArgument(Objects.equals(merge.getTarget(), enrich.getTarget()), "target ids must be equal"); checkArgument(Objects.equals(merge.getRelType(), enrich.getRelType()), "relType(s) must be equal"); checkArgument( - Objects.equals(merge.getSubRelType(), enrich.getSubRelType()), "subRelType(s) must be equal"); + Objects.equals(merge.getSubRelType(), enrich.getSubRelType()), "subRelType(s) must be equal"); checkArgument(Objects.equals(merge.getRelClass(), enrich.getRelClass()), "relClass(es) must be equal"); // merge.setProvenance(mergeLists(merge.getProvenance(), enrich.getProvenance())); @@ -403,10 +408,10 @@ public class MergeUtils { merge.setValidationDate(ModelSupport.oldest(merge.getValidationDate(), enrich.getValidationDate())); } catch (ParseException e) { throw new IllegalArgumentException(String - .format( - "invalid validation date format in relation [s:%s, t:%s]: %s", merge.getSource(), - merge.getTarget(), - merge.getValidationDate())); + .format( + "invalid validation date format in relation [s:%s, t:%s]: %s", merge.getSource(), + merge.getTarget(), + merge.getValidationDate())); } // TODO keyvalue merge @@ -420,7 +425,7 @@ public class MergeUtils { T merge = mergeOafEntityFields(original, enrich, trust); if (merge.getProcessingchargeamount() == null - || StringUtils.isBlank(merge.getProcessingchargeamount().getValue())) { + || StringUtils.isBlank(merge.getProcessingchargeamount().getValue())) { merge.setProcessingchargeamount(enrich.getProcessingchargeamount()); merge.setProcessingchargecurrency(enrich.getProcessingchargecurrency()); } @@ -452,8 +457,8 @@ public class MergeUtils { } merge - .setDateofacceptance( - mergeDateOfAcceptance(merge.getDateofacceptance(), enrich.getDateofacceptance(), trust)); + .setDateofacceptance( + mergeDateOfAcceptance(merge.getDateofacceptance(), enrich.getDateofacceptance(), trust)); merge.setPublisher(coalesce(merge.getPublisher(), enrich.getPublisher())); merge.setEmbargoenddate(coalesce(merge.getEmbargoenddate(), enrich.getEmbargoenddate())); @@ -468,7 +473,7 @@ public class MergeUtils { merge.setCoverage(unionDistinctLists(merge.getCoverage(), enrich.getCoverage(), trust)); if (enrich.getBestaccessright() != null - && new AccessRightComparator<>() + && new AccessRightComparator<>() .compare(enrich.getBestaccessright(), merge.getBestaccessright()) < 0) { merge.setBestaccessright(enrich.getBestaccessright()); } @@ -481,8 +486,8 @@ public class MergeUtils { // ok merge - .setExternalReference( - mergeExternalReference(merge.getExternalReference(), enrich.getExternalReference(), trust)); + .setExternalReference( + mergeExternalReference(merge.getExternalReference(), enrich.getExternalReference(), trust)); // instance enrichment or union // review instance equals => add pid to comparision @@ -490,17 +495,17 @@ public class MergeUtils { merge.setInstance(mergeInstances(merge.getInstance(), enrich.getInstance(), trust)); } else { final List enrichmentInstances = isAnEnrichment(merge) ? merge.getInstance() - : enrich.getInstance(); + : enrich.getInstance(); final List enrichedInstances = isAnEnrichment(merge) ? enrich.getInstance() - : merge.getInstance(); + : merge.getInstance(); if (isAnEnrichment(merge)) merge.setDataInfo(enrich.getDataInfo()); merge.setInstance(enrichInstances(enrichedInstances, enrichmentInstances)); } merge - .setEoscifguidelines( - mergeEosciifguidelines(merge.getEoscifguidelines(), enrich.getEoscifguidelines(), trust)); + .setEoscifguidelines( + mergeEosciifguidelines(merge.getEoscifguidelines(), enrich.getEoscifguidelines(), trust)); merge.setIsGreen(booleanOR(merge.getIsGreen(), enrich.getIsGreen())); // OK but should be list of values merge.setOpenAccessColor(coalesce(merge.getOpenAccessColor(), enrich.getOpenAccessColor())); @@ -526,7 +531,7 @@ public class MergeUtils { LocalDate enrich_date = LocalDate.parse(enrich.getValue(), DateTimeFormatter.ISO_DATE); if (enrich_date.getYear() > 1300 - && (merge_date.getYear() < 1300 || merge_date.isAfter(enrich_date))) { + && (merge_date.getYear() < 1300 || merge_date.isAfter(enrich_date))) { return enrich; } } catch (NullPointerException | DateTimeParseException e) { @@ -544,56 +549,56 @@ public class MergeUtils { private static List mergeInstances(List v1, List v2, int trust) { return mergeLists( - v1, v2, trust, - MergeUtils::instanceKeyExtractor, - MergeUtils::instanceMerger); + v1, v2, trust, + MergeUtils::instanceKeyExtractor, + MergeUtils::instanceMerger); } private static List mergeEosciifguidelines(List v1, List v2, - int trust) { + int trust) { return mergeLists( - v1, v2, trust, er -> Joiner - .on("||") - .useForNull("") - .join(er.getCode(), er.getLabel(), er.getUrl(), er.getSemanticRelation()), - (r, l) -> r); + v1, v2, trust, er -> Joiner + .on("||") + .useForNull("") + .join(er.getCode(), er.getLabel(), er.getUrl(), er.getSemanticRelation()), + (r, l) -> r); } private static List mergeExternalReference(List v1, - List v2, int trust) { + List v2, int trust) { return mergeLists( - v1, v2, trust, er -> Joiner - .on(',') - .useForNull("") - .join( - er.getSitename(), er.getLabel(), - er.getUrl(), toString(er.getQualifier()), er.getRefidentifier(), - er.getQuery(), toString(er.getDataInfo())), - (r, l) -> r); + v1, v2, trust, er -> Joiner + .on(',') + .useForNull("") + .join( + er.getSitename(), er.getLabel(), + er.getUrl(), toString(er.getQualifier()), er.getRefidentifier(), + er.getQuery(), toString(er.getDataInfo())), + (r, l) -> r); } private static String toString(DataInfo di) { return Joiner - .on(',') - .useForNull("") - .join( - di.getInvisible(), di.getInferred(), di.getDeletedbyinference(), di.getTrust(), - di.getInferenceprovenance(), toString(di.getProvenanceaction())); + .on(',') + .useForNull("") + .join( + di.getInvisible(), di.getInferred(), di.getDeletedbyinference(), di.getTrust(), + di.getInferenceprovenance(), toString(di.getProvenanceaction())); } private static String toString(Qualifier q) { return Joiner - .on(',') - .useForNull("") - .join(q.getClassid(), q.getClassname(), q.getSchemeid(), q.getSchemename()); + .on(',') + .useForNull("") + .join(q.getClassid(), q.getClassname(), q.getSchemeid(), q.getSchemename()); } private static String toString(StructuredProperty sp) { return Joiner - .on(',') - .useForNull("") - .join(toString(sp.getQualifier()), sp.getValue()); + .on(',') + .useForNull("") + .join(toString(sp.getQualifier()), sp.getValue()); } private static List mergeStructuredProperties(List v1, List v2, int trust) { @@ -632,17 +637,17 @@ public class MergeUtils { // 2. @@ // 3. || return String - .join( - "::", - kvKeyExtractor(i.getHostedby()), - kvKeyExtractor(i.getCollectedfrom()), - qualifierKeyExtractor(i.getAccessright()), - qualifierKeyExtractor(i.getInstancetype()), - Optional.ofNullable(i.getUrl()).map(u -> String.join("@@", u)).orElse(null), - Optional - .ofNullable(i.getPid()) - .map(pp -> pp.stream().map(MergeUtils::spKeyExtractor).collect(Collectors.joining("@@"))) - .orElse(null)); + .join( + "::", + kvKeyExtractor(i.getHostedby()), + kvKeyExtractor(i.getCollectedfrom()), + qualifierKeyExtractor(i.getAccessright()), + qualifierKeyExtractor(i.getInstancetype()), + Optional.ofNullable(i.getUrl()).map(u -> String.join("@@", u)).orElse(null), + Optional + .ofNullable(i.getPid()) + .map(pp -> pp.stream().map(MergeUtils::spKeyExtractor).collect(Collectors.joining("@@"))) + .orElse(null)); } private static Instance instanceMerger(Instance i1, Instance i2) { @@ -653,30 +658,30 @@ public class MergeUtils { i.setInstancetype(i1.getInstancetype()); i.setPid(mergeLists(i1.getPid(), i2.getPid(), 0, MergeUtils::spKeyExtractor, (sp1, sp2) -> sp1)); i - .setAlternateIdentifier( - mergeLists( - i1.getAlternateIdentifier(), i2.getAlternateIdentifier(), 0, MergeUtils::spKeyExtractor, - (sp1, sp2) -> sp1)); + .setAlternateIdentifier( + mergeLists( + i1.getAlternateIdentifier(), i2.getAlternateIdentifier(), 0, MergeUtils::spKeyExtractor, + (sp1, sp2) -> sp1)); i - .setRefereed( - Collections - .min( - Stream.of(i1.getRefereed(), i2.getRefereed()).collect(Collectors.toList()), - new RefereedComparator())); + .setRefereed( + Collections + .min( + Stream.of(i1.getRefereed(), i2.getRefereed()).collect(Collectors.toList()), + new RefereedComparator())); i - .setInstanceTypeMapping( - mergeLists( - i1.getInstanceTypeMapping(), i2.getInstanceTypeMapping(), 0, - MergeUtils::instanceTypeMappingKeyExtractor, (itm1, itm2) -> itm1)); + .setInstanceTypeMapping( + mergeLists( + i1.getInstanceTypeMapping(), i2.getInstanceTypeMapping(), 0, + MergeUtils::instanceTypeMappingKeyExtractor, (itm1, itm2) -> itm1)); i.setFulltext(selectFulltext(i1.getFulltext(), i2.getFulltext())); i.setDateofacceptance(selectOldestDate(i1.getDateofacceptance(), i2.getDateofacceptance())); i.setLicense(coalesce(i1.getLicense(), i2.getLicense())); i.setProcessingchargeamount(coalesce(i1.getProcessingchargeamount(), i2.getProcessingchargeamount())); i.setProcessingchargecurrency(coalesce(i1.getProcessingchargecurrency(), i2.getProcessingchargecurrency())); i - .setMeasures( - mergeLists(i1.getMeasures(), i2.getMeasures(), 0, MergeUtils::measureKeyExtractor, (m1, m2) -> m1)); + .setMeasures( + mergeLists(i1.getMeasures(), i2.getMeasures(), 0, MergeUtils::measureKeyExtractor, (m1, m2) -> m1)); i.setUrl(unionDistinctListOfString(i1.getUrl(), i2.getUrl())); @@ -685,14 +690,14 @@ public class MergeUtils { private static String measureKeyExtractor(Measure m) { return String - .join( - "::", - m.getId(), - m - .getUnit() - .stream() - .map(KeyValue::getKey) - .collect(Collectors.joining("::"))); + .join( + "::", + m.getId(), + m + .getUnit() + .stream() + .map(KeyValue::getKey) + .collect(Collectors.joining("::"))); } private static Field selectOldestDate(Field d1, Field d2) { @@ -703,16 +708,16 @@ public class MergeUtils { } return Stream - .of(d1, d2) - .min( - Comparator - .comparing( - f -> DateParserUtils - .parseDate(f.getValue()) - .toInstant() - .atZone(ZoneId.systemDefault()) - .toLocalDate())) - .orElse(d1); + .of(d1, d2) + .min( + Comparator + .comparing( + f -> DateParserUtils + .parseDate(f.getValue()) + .toInstant() + .atZone(ZoneId.systemDefault()) + .toLocalDate())) + .orElse(d1); } private static String selectFulltext(String ft1, String ft2) { @@ -727,12 +732,12 @@ public class MergeUtils { private static String instanceTypeMappingKeyExtractor(InstanceTypeMapping itm) { return String - .join( - "::", - itm.getOriginalType(), - itm.getTypeCode(), - itm.getTypeLabel(), - itm.getVocabularyName()); + .join( + "::", + itm.getOriginalType(), + itm.getTypeCode(), + itm.getTypeLabel(), + itm.getVocabularyName()); } private static String kvKeyExtractor(KeyValue kv) { @@ -749,13 +754,13 @@ public class MergeUtils { private static String spKeyExtractor(StructuredProperty sp) { return Optional - .ofNullable(sp) - .map( - s -> Joiner - .on("||") - .useForNull("") - .join(qualifierKeyExtractor(s.getQualifier()), s.getValue())) - .orElse(null); + .ofNullable(sp) + .map( + s -> Joiner + .on("||") + .useForNull("") + .join(qualifierKeyExtractor(s.getQualifier()), s.getValue())) + .orElse(null); } private static T mergeORP(T original, T enrich) { @@ -777,8 +782,8 @@ public class MergeUtils { merge.setLicense(unionDistinctLists(merge.getLicense(), enrich.getLicense(), trust)); merge.setCodeRepositoryUrl(chooseReference(merge.getCodeRepositoryUrl(), enrich.getCodeRepositoryUrl(), trust)); merge - .setProgrammingLanguage( - chooseReference(merge.getProgrammingLanguage(), enrich.getProgrammingLanguage(), trust)); + .setProgrammingLanguage( + chooseReference(merge.getProgrammingLanguage(), enrich.getProgrammingLanguage(), trust)); return merge; } @@ -792,11 +797,11 @@ public class MergeUtils { merge.setSize(chooseReference(merge.getSize(), enrich.getSize(), trust)); merge.setVersion(chooseReference(merge.getVersion(), enrich.getVersion(), trust)); merge - .setLastmetadataupdate( - chooseReference(merge.getLastmetadataupdate(), enrich.getLastmetadataupdate(), trust)); + .setLastmetadataupdate( + chooseReference(merge.getLastmetadataupdate(), enrich.getLastmetadataupdate(), trust)); merge - .setMetadataversionnumber( - chooseReference(merge.getMetadataversionnumber(), enrich.getMetadataversionnumber(), trust)); + .setMetadataversionnumber( + chooseReference(merge.getMetadataversionnumber(), enrich.getMetadataversionnumber(), trust)); merge.setGeolocation(unionDistinctLists(merge.getGeolocation(), enrich.getGeolocation(), trust)); return merge; @@ -818,26 +823,26 @@ public class MergeUtils { merged.setLegalshortname(chooseReference(merged.getLegalshortname(), enrich.getLegalshortname(), trust)); merged.setLegalname(chooseReference(merged.getLegalname(), enrich.getLegalname(), trust)); merged - .setAlternativeNames(unionDistinctLists(enrich.getAlternativeNames(), merged.getAlternativeNames(), trust)); + .setAlternativeNames(unionDistinctLists(enrich.getAlternativeNames(), merged.getAlternativeNames(), trust)); merged.setWebsiteurl(chooseReference(merged.getWebsiteurl(), enrich.getWebsiteurl(), trust)); merged.setLogourl(chooseReference(merged.getLogourl(), enrich.getLogourl(), trust)); merged.setEclegalbody(chooseReference(merged.getEclegalbody(), enrich.getEclegalbody(), trust)); merged.setEclegalperson(chooseReference(merged.getEclegalperson(), enrich.getEclegalperson(), trust)); merged.setEcnonprofit(chooseReference(merged.getEcnonprofit(), enrich.getEcnonprofit(), trust)); merged - .setEcresearchorganization( - chooseReference(merged.getEcresearchorganization(), enrich.getEcresearchorganization(), trust)); + .setEcresearchorganization( + chooseReference(merged.getEcresearchorganization(), enrich.getEcresearchorganization(), trust)); merged - .setEchighereducation(chooseReference(merged.getEchighereducation(), enrich.getEchighereducation(), trust)); + .setEchighereducation(chooseReference(merged.getEchighereducation(), enrich.getEchighereducation(), trust)); merged - .setEcinternationalorganizationeurinterests( - chooseReference( - merged.getEcinternationalorganizationeurinterests(), - enrich.getEcinternationalorganizationeurinterests(), trust)); + .setEcinternationalorganizationeurinterests( + chooseReference( + merged.getEcinternationalorganizationeurinterests(), + enrich.getEcinternationalorganizationeurinterests(), trust)); merged - .setEcinternationalorganization( - chooseReference( - merged.getEcinternationalorganization(), enrich.getEcinternationalorganization(), trust)); + .setEcinternationalorganization( + chooseReference( + merged.getEcinternationalorganization(), enrich.getEcinternationalorganization(), trust)); merged.setEcenterprise(chooseReference(merged.getEcenterprise(), enrich.getEcenterprise(), trust)); merged.setEcsmevalidated(chooseReference(merged.getEcsmevalidated(), enrich.getEcsmevalidated(), trust)); merged.setEcnutscode(chooseReference(merged.getEcnutscode(), enrich.getEcnutscode(), trust)); @@ -861,8 +866,8 @@ public class MergeUtils { merged.setDuration(chooseReference(merged.getDuration(), enrich.getDuration(), trust)); merged.setEcsc39(chooseReference(merged.getEcsc39(), enrich.getEcsc39(), trust)); merged - .setOamandatepublications( - chooseReference(merged.getOamandatepublications(), enrich.getOamandatepublications(), trust)); + .setOamandatepublications( + chooseReference(merged.getOamandatepublications(), enrich.getOamandatepublications(), trust)); merged.setEcarticle29_3(chooseReference(merged.getEcarticle29_3(), enrich.getEcarticle29_3(), trust)); merged.setSubjects(unionDistinctLists(merged.getSubjects(), enrich.getSubjects(), trust)); merged.setFundingtree(unionDistinctLists(merged.getFundingtree(), enrich.getFundingtree(), trust)); @@ -888,8 +893,8 @@ public class MergeUtils { } merged - .setH2020classification( - unionDistinctLists(merged.getH2020classification(), enrich.getH2020classification(), trust)); + .setH2020classification( + unionDistinctLists(merged.getH2020classification(), enrich.getH2020classification(), trust)); return merged; } @@ -916,7 +921,7 @@ public class MergeUtils { * @return list of instances possibly enriched */ private static List enrichInstances(final List toEnrichInstances, - final List enrichmentInstances) { + final List enrichmentInstances) { final List enrichmentResult = new ArrayList<>(); if (toEnrichInstances == null) { @@ -954,42 +959,42 @@ public class MergeUtils { */ private static Map toInstanceMap(final List ri) { return ri - .stream() - .filter(i -> i.getPid() != null || i.getAlternateIdentifier() != null) - .flatMap(i -> { - final List> result = new ArrayList<>(); - if (i.getPid() != null) - i - .getPid() - .stream() - .filter(MergeUtils::validPid) - .forEach(p -> result.add(new ImmutablePair<>(extractKeyFromPid(p), i))); - if (i.getAlternateIdentifier() != null) - i - .getAlternateIdentifier() - .stream() - .filter(MergeUtils::validPid) - .forEach(p -> result.add(new ImmutablePair<>(extractKeyFromPid(p), i))); - return result.stream(); - }) - .collect( - Collectors - .toMap( - Pair::getLeft, - Pair::getRight, - (a, b) -> a)); + .stream() + .filter(i -> i.getPid() != null || i.getAlternateIdentifier() != null) + .flatMap(i -> { + final List> result = new ArrayList<>(); + if (i.getPid() != null) + i + .getPid() + .stream() + .filter(MergeUtils::validPid) + .forEach(p -> result.add(new ImmutablePair<>(extractKeyFromPid(p), i))); + if (i.getAlternateIdentifier() != null) + i + .getAlternateIdentifier() + .stream() + .filter(MergeUtils::validPid) + .forEach(p -> result.add(new ImmutablePair<>(extractKeyFromPid(p), i))); + return result.stream(); + }) + .collect( + Collectors + .toMap( + Pair::getLeft, + Pair::getRight, + (a, b) -> a)); } private static boolean isFromDelegatedAuthority(Result r) { return Optional - .ofNullable(r.getInstance()) - .map( - instance -> instance - .stream() - .filter(i -> Objects.nonNull(i.getCollectedfrom())) - .map(i -> i.getCollectedfrom().getKey()) - .anyMatch(cfId -> IdentifierFactory.delegatedAuthorityDatasourceIds().contains(cfId))) - .orElse(false); + .ofNullable(r.getInstance()) + .map( + instance -> instance + .stream() + .filter(i -> Objects.nonNull(i.getCollectedfrom())) + .map(i -> i.getCollectedfrom().getKey()) + .anyMatch(cfId -> IdentifierFactory.delegatedAuthorityDatasourceIds().contains(cfId))) + .orElse(false); } /** @@ -1025,15 +1030,15 @@ public class MergeUtils { * @return the list */ private static List findEnrichmentsByPID(final List pids, - final Map enrichments) { + final Map enrichments) { if (pids == null || enrichments == null) return null; return pids - .stream() - .map(MergeUtils::extractKeyFromPid) - .map(enrichments::get) - .filter(Objects::nonNull) - .collect(Collectors.toList()); + .stream() + .map(MergeUtils::extractKeyFromPid) + .map(enrichments::get) + .filter(Objects::nonNull) + .collect(Collectors.toList()); } /** @@ -1044,8 +1049,8 @@ public class MergeUtils { */ private static boolean isAnEnrichment(OafEntity e) { return e.getDataInfo() != null && - e.getDataInfo().getProvenanceaction() != null - && ModelConstants.PROVENANCE_ENRICH.equalsIgnoreCase(e.getDataInfo().getProvenanceaction().getClassid()); + e.getDataInfo().getProvenanceaction() != null + && ModelConstants.PROVENANCE_ENRICH.equalsIgnoreCase(e.getDataInfo().getProvenanceaction().getClassid()); } /** @@ -1068,17 +1073,17 @@ public class MergeUtils { merge.setHostedby(firstNonNull(merge.getHostedby(), enrichment.getHostedby())); merge.setUrl(unionDistinctLists(merge.getUrl(), enrichment.getUrl(), 0)); merge - .setDistributionlocation( - firstNonNull(merge.getDistributionlocation(), enrichment.getDistributionlocation())); + .setDistributionlocation( + firstNonNull(merge.getDistributionlocation(), enrichment.getDistributionlocation())); merge.setCollectedfrom(firstNonNull(merge.getCollectedfrom(), enrichment.getCollectedfrom())); // pid and alternateId are used for matching merge.setDateofacceptance(firstNonNull(merge.getDateofacceptance(), enrichment.getDateofacceptance())); merge - .setProcessingchargeamount( - firstNonNull(merge.getProcessingchargeamount(), enrichment.getProcessingchargeamount())); + .setProcessingchargeamount( + firstNonNull(merge.getProcessingchargeamount(), enrichment.getProcessingchargeamount())); merge - .setProcessingchargecurrency( - firstNonNull(merge.getProcessingchargecurrency(), enrichment.getProcessingchargecurrency())); + .setProcessingchargecurrency( + firstNonNull(merge.getProcessingchargecurrency(), enrichment.getProcessingchargecurrency())); merge.setRefereed(firstNonNull(merge.getRefereed(), enrichment.getRefereed())); merge.setMeasures(unionDistinctLists(merge.getMeasures(), enrichment.getMeasures(), 0)); merge.setFulltext(firstNonNull(merge.getFulltext(), enrichment.getFulltext())); @@ -1086,14 +1091,14 @@ public class MergeUtils { private static int compareTrust(Oaf a, Oaf b) { String left = Optional - .ofNullable(a.getDataInfo()) - .map(DataInfo::getTrust) - .orElse("0.0"); + .ofNullable(a.getDataInfo()) + .map(DataInfo::getTrust) + .orElse("0.0"); String right = Optional - .ofNullable(b.getDataInfo()) - .map(DataInfo::getTrust) - .orElse("0.0"); + .ofNullable(b.getDataInfo()) + .map(DataInfo::getTrust) + .orElse("0.0"); return left.compareTo(right); } From a1d5ad5c2609c91b60b97600163072d551dcc440 Mon Sep 17 00:00:00 2001 From: "sandro.labruzzo" Date: Wed, 13 Nov 2024 09:51:13 +0100 Subject: [PATCH 20/73] code formatted --- .../dhp/schema/oaf/utils/MergeUtils.java | 423 +++++++++--------- .../personentity/ExtractPerson.java | 22 +- .../plugin/gtr2/Gtr2PublicationsIterator.java | 6 +- 3 files changed, 228 insertions(+), 223 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtils.java index dc76860f8..cd8506583 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtils.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtils.java @@ -16,8 +16,6 @@ import java.util.function.Function; import java.util.stream.Collectors; import java.util.stream.Stream; -import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; -import eu.dnetlib.dhp.schema.common.EntityType; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; @@ -25,8 +23,10 @@ import org.apache.commons.lang3.tuple.Pair; import com.github.sisyphsu.dateparser.DateParserUtils; import com.google.common.base.Joiner; +import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.oa.merge.AuthorMerger; import eu.dnetlib.dhp.schema.common.AccessRightComparator; +import eu.dnetlib.dhp.schema.common.EntityType; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.*; @@ -46,7 +46,7 @@ public class MergeUtils { } public static T mergeGroup(Iterator oafEntityIterator, - boolean checkDelegateAuthority, VocabularyGroup vocs) { + boolean checkDelegateAuthority, VocabularyGroup vocs) { ArrayList sortedEntities = new ArrayList<>(); oafEntityIterator.forEachRemaining(sortedEntities::add); @@ -74,14 +74,15 @@ public class MergeUtils { if (!vocs.vocabularyExists(ModelConstants.DNET_RESULT_TYPOLOGIES)) { return (T) mergedResult; } else { - final Qualifier expectedResultType = vocs.lookupTermBySynonym( + final Qualifier expectedResultType = vocs + .lookupTermBySynonym( ModelConstants.DNET_RESULT_TYPOLOGIES, i.getInstancetype().getClassid()); if (Objects.isNull(expectedResultType)) { throw new IllegalArgumentException( - "instance type not bound to any result type in dnet:result_typologies: " + - i.getInstancetype().getClassid()); + "instance type not bound to any result type in dnet:result_typologies: " + + i.getInstancetype().getClassid()); } // there is a clash among the result types @@ -122,10 +123,10 @@ public class MergeUtils { return mergeRelation((Relation) left, (Relation) right); } else { throw new RuntimeException( - String - .format( - "MERGE_FROM_AND_GET incompatible types: %s, %s", - left.getClass().getCanonicalName(), right.getClass().getCanonicalName())); + String + .format( + "MERGE_FROM_AND_GET incompatible types: %s, %s", + left.getClass().getCanonicalName(), right.getClass().getCanonicalName())); } } @@ -164,10 +165,10 @@ public class MergeUtils { return mergeProject((Project) left, (Project) right); } else { throw new RuntimeException( - String - .format( - "MERGE_FROM_AND_GET incompatible types: %s, %s", - left.getClass().getCanonicalName(), right.getClass().getCanonicalName())); + String + .format( + "MERGE_FROM_AND_GET incompatible types: %s, %s", + left.getClass().getCanonicalName(), right.getClass().getCanonicalName())); } } @@ -258,7 +259,7 @@ public class MergeUtils { } private static List mergeLists(final List left, final List right, int trust, - Function keyExtractor, BinaryOperator merger) { + Function keyExtractor, BinaryOperator merger) { if (left == null || left.isEmpty()) { return right != null ? right : new ArrayList<>(); } else if (right == null || right.isEmpty()) { @@ -269,11 +270,11 @@ public class MergeUtils { List l = trust >= 0 ? right : left; return new ArrayList<>(Stream - .concat(h.stream(), l.stream()) - .filter(Objects::nonNull) - .distinct() - .collect(Collectors.toMap(keyExtractor, v -> v, merger, LinkedHashMap::new)) - .values()); + .concat(h.stream(), l.stream()) + .filter(Objects::nonNull) + .distinct() + .collect(Collectors.toMap(keyExtractor, v -> v, merger, LinkedHashMap::new)) + .values()); } private static List unionDistinctLists(final List left, final List right, int trust) { @@ -287,10 +288,10 @@ public class MergeUtils { List l = trust >= 0 ? right : left; return Stream - .concat(h.stream(), l.stream()) - .filter(Objects::nonNull) - .distinct() - .collect(Collectors.toList()); + .concat(h.stream(), l.stream()) + .filter(Objects::nonNull) + .distinct() + .collect(Collectors.toList()); } private static List unionDistinctListOfString(final List l, final List r) { @@ -301,10 +302,10 @@ public class MergeUtils { } return Stream - .concat(l.stream(), r.stream()) - .filter(StringUtils::isNotBlank) - .distinct() - .collect(Collectors.toList()); + .concat(l.stream(), r.stream()) + .filter(StringUtils::isNotBlank) + .distinct() + .collect(Collectors.toList()); } // TODO review @@ -330,7 +331,7 @@ public class MergeUtils { } private static List unionTitle(List left, List right, - int trust) { + int trust) { if (left == null) { return right; } else if (right == null) { @@ -341,10 +342,10 @@ public class MergeUtils { List l = trust >= 0 ? right : left; return Stream - .concat(h.stream(), l.stream()) - .filter(Objects::isNull) - .distinct() - .collect(Collectors.toList()); + .concat(h.stream(), l.stream()) + .filter(Objects::isNull) + .distinct() + .collect(Collectors.toList()); } /** @@ -379,8 +380,8 @@ public class MergeUtils { merged.setPid(mergeLists(merged.getPid(), enrich.getPid(), trust, MergeUtils::spKeyExtractor, (p1, p2) -> p1)); merged.setDateofcollection(LocalDateTime.now().toString()); merged - .setDateoftransformation( - chooseString(merged.getDateoftransformation(), enrich.getDateoftransformation(), trust)); + .setDateoftransformation( + chooseString(merged.getDateoftransformation(), enrich.getDateoftransformation(), trust)); merged.setExtraInfo(unionDistinctLists(merged.getExtraInfo(), enrich.getExtraInfo(), trust)); // When merging records OAI provenance becomes null merged.setOaiprovenance(null); @@ -397,7 +398,7 @@ public class MergeUtils { checkArgument(Objects.equals(merge.getTarget(), enrich.getTarget()), "target ids must be equal"); checkArgument(Objects.equals(merge.getRelType(), enrich.getRelType()), "relType(s) must be equal"); checkArgument( - Objects.equals(merge.getSubRelType(), enrich.getSubRelType()), "subRelType(s) must be equal"); + Objects.equals(merge.getSubRelType(), enrich.getSubRelType()), "subRelType(s) must be equal"); checkArgument(Objects.equals(merge.getRelClass(), enrich.getRelClass()), "relClass(es) must be equal"); // merge.setProvenance(mergeLists(merge.getProvenance(), enrich.getProvenance())); @@ -408,10 +409,10 @@ public class MergeUtils { merge.setValidationDate(ModelSupport.oldest(merge.getValidationDate(), enrich.getValidationDate())); } catch (ParseException e) { throw new IllegalArgumentException(String - .format( - "invalid validation date format in relation [s:%s, t:%s]: %s", merge.getSource(), - merge.getTarget(), - merge.getValidationDate())); + .format( + "invalid validation date format in relation [s:%s, t:%s]: %s", merge.getSource(), + merge.getTarget(), + merge.getValidationDate())); } // TODO keyvalue merge @@ -425,7 +426,7 @@ public class MergeUtils { T merge = mergeOafEntityFields(original, enrich, trust); if (merge.getProcessingchargeamount() == null - || StringUtils.isBlank(merge.getProcessingchargeamount().getValue())) { + || StringUtils.isBlank(merge.getProcessingchargeamount().getValue())) { merge.setProcessingchargeamount(enrich.getProcessingchargeamount()); merge.setProcessingchargecurrency(enrich.getProcessingchargecurrency()); } @@ -457,8 +458,8 @@ public class MergeUtils { } merge - .setDateofacceptance( - mergeDateOfAcceptance(merge.getDateofacceptance(), enrich.getDateofacceptance(), trust)); + .setDateofacceptance( + mergeDateOfAcceptance(merge.getDateofacceptance(), enrich.getDateofacceptance(), trust)); merge.setPublisher(coalesce(merge.getPublisher(), enrich.getPublisher())); merge.setEmbargoenddate(coalesce(merge.getEmbargoenddate(), enrich.getEmbargoenddate())); @@ -473,7 +474,7 @@ public class MergeUtils { merge.setCoverage(unionDistinctLists(merge.getCoverage(), enrich.getCoverage(), trust)); if (enrich.getBestaccessright() != null - && new AccessRightComparator<>() + && new AccessRightComparator<>() .compare(enrich.getBestaccessright(), merge.getBestaccessright()) < 0) { merge.setBestaccessright(enrich.getBestaccessright()); } @@ -486,8 +487,8 @@ public class MergeUtils { // ok merge - .setExternalReference( - mergeExternalReference(merge.getExternalReference(), enrich.getExternalReference(), trust)); + .setExternalReference( + mergeExternalReference(merge.getExternalReference(), enrich.getExternalReference(), trust)); // instance enrichment or union // review instance equals => add pid to comparision @@ -495,17 +496,17 @@ public class MergeUtils { merge.setInstance(mergeInstances(merge.getInstance(), enrich.getInstance(), trust)); } else { final List enrichmentInstances = isAnEnrichment(merge) ? merge.getInstance() - : enrich.getInstance(); + : enrich.getInstance(); final List enrichedInstances = isAnEnrichment(merge) ? enrich.getInstance() - : merge.getInstance(); + : merge.getInstance(); if (isAnEnrichment(merge)) merge.setDataInfo(enrich.getDataInfo()); merge.setInstance(enrichInstances(enrichedInstances, enrichmentInstances)); } merge - .setEoscifguidelines( - mergeEosciifguidelines(merge.getEoscifguidelines(), enrich.getEoscifguidelines(), trust)); + .setEoscifguidelines( + mergeEosciifguidelines(merge.getEoscifguidelines(), enrich.getEoscifguidelines(), trust)); merge.setIsGreen(booleanOR(merge.getIsGreen(), enrich.getIsGreen())); // OK but should be list of values merge.setOpenAccessColor(coalesce(merge.getOpenAccessColor(), enrich.getOpenAccessColor())); @@ -531,7 +532,7 @@ public class MergeUtils { LocalDate enrich_date = LocalDate.parse(enrich.getValue(), DateTimeFormatter.ISO_DATE); if (enrich_date.getYear() > 1300 - && (merge_date.getYear() < 1300 || merge_date.isAfter(enrich_date))) { + && (merge_date.getYear() < 1300 || merge_date.isAfter(enrich_date))) { return enrich; } } catch (NullPointerException | DateTimeParseException e) { @@ -549,56 +550,56 @@ public class MergeUtils { private static List mergeInstances(List v1, List v2, int trust) { return mergeLists( - v1, v2, trust, - MergeUtils::instanceKeyExtractor, - MergeUtils::instanceMerger); + v1, v2, trust, + MergeUtils::instanceKeyExtractor, + MergeUtils::instanceMerger); } private static List mergeEosciifguidelines(List v1, List v2, - int trust) { + int trust) { return mergeLists( - v1, v2, trust, er -> Joiner - .on("||") - .useForNull("") - .join(er.getCode(), er.getLabel(), er.getUrl(), er.getSemanticRelation()), - (r, l) -> r); + v1, v2, trust, er -> Joiner + .on("||") + .useForNull("") + .join(er.getCode(), er.getLabel(), er.getUrl(), er.getSemanticRelation()), + (r, l) -> r); } private static List mergeExternalReference(List v1, - List v2, int trust) { + List v2, int trust) { return mergeLists( - v1, v2, trust, er -> Joiner - .on(',') - .useForNull("") - .join( - er.getSitename(), er.getLabel(), - er.getUrl(), toString(er.getQualifier()), er.getRefidentifier(), - er.getQuery(), toString(er.getDataInfo())), - (r, l) -> r); + v1, v2, trust, er -> Joiner + .on(',') + .useForNull("") + .join( + er.getSitename(), er.getLabel(), + er.getUrl(), toString(er.getQualifier()), er.getRefidentifier(), + er.getQuery(), toString(er.getDataInfo())), + (r, l) -> r); } private static String toString(DataInfo di) { return Joiner - .on(',') - .useForNull("") - .join( - di.getInvisible(), di.getInferred(), di.getDeletedbyinference(), di.getTrust(), - di.getInferenceprovenance(), toString(di.getProvenanceaction())); + .on(',') + .useForNull("") + .join( + di.getInvisible(), di.getInferred(), di.getDeletedbyinference(), di.getTrust(), + di.getInferenceprovenance(), toString(di.getProvenanceaction())); } private static String toString(Qualifier q) { return Joiner - .on(',') - .useForNull("") - .join(q.getClassid(), q.getClassname(), q.getSchemeid(), q.getSchemename()); + .on(',') + .useForNull("") + .join(q.getClassid(), q.getClassname(), q.getSchemeid(), q.getSchemename()); } private static String toString(StructuredProperty sp) { return Joiner - .on(',') - .useForNull("") - .join(toString(sp.getQualifier()), sp.getValue()); + .on(',') + .useForNull("") + .join(toString(sp.getQualifier()), sp.getValue()); } private static List mergeStructuredProperties(List v1, List v2, int trust) { @@ -637,17 +638,17 @@ public class MergeUtils { // 2. @@ // 3. || return String - .join( - "::", - kvKeyExtractor(i.getHostedby()), - kvKeyExtractor(i.getCollectedfrom()), - qualifierKeyExtractor(i.getAccessright()), - qualifierKeyExtractor(i.getInstancetype()), - Optional.ofNullable(i.getUrl()).map(u -> String.join("@@", u)).orElse(null), - Optional - .ofNullable(i.getPid()) - .map(pp -> pp.stream().map(MergeUtils::spKeyExtractor).collect(Collectors.joining("@@"))) - .orElse(null)); + .join( + "::", + kvKeyExtractor(i.getHostedby()), + kvKeyExtractor(i.getCollectedfrom()), + qualifierKeyExtractor(i.getAccessright()), + qualifierKeyExtractor(i.getInstancetype()), + Optional.ofNullable(i.getUrl()).map(u -> String.join("@@", u)).orElse(null), + Optional + .ofNullable(i.getPid()) + .map(pp -> pp.stream().map(MergeUtils::spKeyExtractor).collect(Collectors.joining("@@"))) + .orElse(null)); } private static Instance instanceMerger(Instance i1, Instance i2) { @@ -658,30 +659,30 @@ public class MergeUtils { i.setInstancetype(i1.getInstancetype()); i.setPid(mergeLists(i1.getPid(), i2.getPid(), 0, MergeUtils::spKeyExtractor, (sp1, sp2) -> sp1)); i - .setAlternateIdentifier( - mergeLists( - i1.getAlternateIdentifier(), i2.getAlternateIdentifier(), 0, MergeUtils::spKeyExtractor, - (sp1, sp2) -> sp1)); + .setAlternateIdentifier( + mergeLists( + i1.getAlternateIdentifier(), i2.getAlternateIdentifier(), 0, MergeUtils::spKeyExtractor, + (sp1, sp2) -> sp1)); i - .setRefereed( - Collections - .min( - Stream.of(i1.getRefereed(), i2.getRefereed()).collect(Collectors.toList()), - new RefereedComparator())); + .setRefereed( + Collections + .min( + Stream.of(i1.getRefereed(), i2.getRefereed()).collect(Collectors.toList()), + new RefereedComparator())); i - .setInstanceTypeMapping( - mergeLists( - i1.getInstanceTypeMapping(), i2.getInstanceTypeMapping(), 0, - MergeUtils::instanceTypeMappingKeyExtractor, (itm1, itm2) -> itm1)); + .setInstanceTypeMapping( + mergeLists( + i1.getInstanceTypeMapping(), i2.getInstanceTypeMapping(), 0, + MergeUtils::instanceTypeMappingKeyExtractor, (itm1, itm2) -> itm1)); i.setFulltext(selectFulltext(i1.getFulltext(), i2.getFulltext())); i.setDateofacceptance(selectOldestDate(i1.getDateofacceptance(), i2.getDateofacceptance())); i.setLicense(coalesce(i1.getLicense(), i2.getLicense())); i.setProcessingchargeamount(coalesce(i1.getProcessingchargeamount(), i2.getProcessingchargeamount())); i.setProcessingchargecurrency(coalesce(i1.getProcessingchargecurrency(), i2.getProcessingchargecurrency())); i - .setMeasures( - mergeLists(i1.getMeasures(), i2.getMeasures(), 0, MergeUtils::measureKeyExtractor, (m1, m2) -> m1)); + .setMeasures( + mergeLists(i1.getMeasures(), i2.getMeasures(), 0, MergeUtils::measureKeyExtractor, (m1, m2) -> m1)); i.setUrl(unionDistinctListOfString(i1.getUrl(), i2.getUrl())); @@ -690,14 +691,14 @@ public class MergeUtils { private static String measureKeyExtractor(Measure m) { return String - .join( - "::", - m.getId(), - m - .getUnit() - .stream() - .map(KeyValue::getKey) - .collect(Collectors.joining("::"))); + .join( + "::", + m.getId(), + m + .getUnit() + .stream() + .map(KeyValue::getKey) + .collect(Collectors.joining("::"))); } private static Field selectOldestDate(Field d1, Field d2) { @@ -708,16 +709,16 @@ public class MergeUtils { } return Stream - .of(d1, d2) - .min( - Comparator - .comparing( - f -> DateParserUtils - .parseDate(f.getValue()) - .toInstant() - .atZone(ZoneId.systemDefault()) - .toLocalDate())) - .orElse(d1); + .of(d1, d2) + .min( + Comparator + .comparing( + f -> DateParserUtils + .parseDate(f.getValue()) + .toInstant() + .atZone(ZoneId.systemDefault()) + .toLocalDate())) + .orElse(d1); } private static String selectFulltext(String ft1, String ft2) { @@ -732,12 +733,12 @@ public class MergeUtils { private static String instanceTypeMappingKeyExtractor(InstanceTypeMapping itm) { return String - .join( - "::", - itm.getOriginalType(), - itm.getTypeCode(), - itm.getTypeLabel(), - itm.getVocabularyName()); + .join( + "::", + itm.getOriginalType(), + itm.getTypeCode(), + itm.getTypeLabel(), + itm.getVocabularyName()); } private static String kvKeyExtractor(KeyValue kv) { @@ -754,13 +755,13 @@ public class MergeUtils { private static String spKeyExtractor(StructuredProperty sp) { return Optional - .ofNullable(sp) - .map( - s -> Joiner - .on("||") - .useForNull("") - .join(qualifierKeyExtractor(s.getQualifier()), s.getValue())) - .orElse(null); + .ofNullable(sp) + .map( + s -> Joiner + .on("||") + .useForNull("") + .join(qualifierKeyExtractor(s.getQualifier()), s.getValue())) + .orElse(null); } private static T mergeORP(T original, T enrich) { @@ -782,8 +783,8 @@ public class MergeUtils { merge.setLicense(unionDistinctLists(merge.getLicense(), enrich.getLicense(), trust)); merge.setCodeRepositoryUrl(chooseReference(merge.getCodeRepositoryUrl(), enrich.getCodeRepositoryUrl(), trust)); merge - .setProgrammingLanguage( - chooseReference(merge.getProgrammingLanguage(), enrich.getProgrammingLanguage(), trust)); + .setProgrammingLanguage( + chooseReference(merge.getProgrammingLanguage(), enrich.getProgrammingLanguage(), trust)); return merge; } @@ -797,11 +798,11 @@ public class MergeUtils { merge.setSize(chooseReference(merge.getSize(), enrich.getSize(), trust)); merge.setVersion(chooseReference(merge.getVersion(), enrich.getVersion(), trust)); merge - .setLastmetadataupdate( - chooseReference(merge.getLastmetadataupdate(), enrich.getLastmetadataupdate(), trust)); + .setLastmetadataupdate( + chooseReference(merge.getLastmetadataupdate(), enrich.getLastmetadataupdate(), trust)); merge - .setMetadataversionnumber( - chooseReference(merge.getMetadataversionnumber(), enrich.getMetadataversionnumber(), trust)); + .setMetadataversionnumber( + chooseReference(merge.getMetadataversionnumber(), enrich.getMetadataversionnumber(), trust)); merge.setGeolocation(unionDistinctLists(merge.getGeolocation(), enrich.getGeolocation(), trust)); return merge; @@ -823,26 +824,26 @@ public class MergeUtils { merged.setLegalshortname(chooseReference(merged.getLegalshortname(), enrich.getLegalshortname(), trust)); merged.setLegalname(chooseReference(merged.getLegalname(), enrich.getLegalname(), trust)); merged - .setAlternativeNames(unionDistinctLists(enrich.getAlternativeNames(), merged.getAlternativeNames(), trust)); + .setAlternativeNames(unionDistinctLists(enrich.getAlternativeNames(), merged.getAlternativeNames(), trust)); merged.setWebsiteurl(chooseReference(merged.getWebsiteurl(), enrich.getWebsiteurl(), trust)); merged.setLogourl(chooseReference(merged.getLogourl(), enrich.getLogourl(), trust)); merged.setEclegalbody(chooseReference(merged.getEclegalbody(), enrich.getEclegalbody(), trust)); merged.setEclegalperson(chooseReference(merged.getEclegalperson(), enrich.getEclegalperson(), trust)); merged.setEcnonprofit(chooseReference(merged.getEcnonprofit(), enrich.getEcnonprofit(), trust)); merged - .setEcresearchorganization( - chooseReference(merged.getEcresearchorganization(), enrich.getEcresearchorganization(), trust)); + .setEcresearchorganization( + chooseReference(merged.getEcresearchorganization(), enrich.getEcresearchorganization(), trust)); merged - .setEchighereducation(chooseReference(merged.getEchighereducation(), enrich.getEchighereducation(), trust)); + .setEchighereducation(chooseReference(merged.getEchighereducation(), enrich.getEchighereducation(), trust)); merged - .setEcinternationalorganizationeurinterests( - chooseReference( - merged.getEcinternationalorganizationeurinterests(), - enrich.getEcinternationalorganizationeurinterests(), trust)); + .setEcinternationalorganizationeurinterests( + chooseReference( + merged.getEcinternationalorganizationeurinterests(), + enrich.getEcinternationalorganizationeurinterests(), trust)); merged - .setEcinternationalorganization( - chooseReference( - merged.getEcinternationalorganization(), enrich.getEcinternationalorganization(), trust)); + .setEcinternationalorganization( + chooseReference( + merged.getEcinternationalorganization(), enrich.getEcinternationalorganization(), trust)); merged.setEcenterprise(chooseReference(merged.getEcenterprise(), enrich.getEcenterprise(), trust)); merged.setEcsmevalidated(chooseReference(merged.getEcsmevalidated(), enrich.getEcsmevalidated(), trust)); merged.setEcnutscode(chooseReference(merged.getEcnutscode(), enrich.getEcnutscode(), trust)); @@ -866,8 +867,8 @@ public class MergeUtils { merged.setDuration(chooseReference(merged.getDuration(), enrich.getDuration(), trust)); merged.setEcsc39(chooseReference(merged.getEcsc39(), enrich.getEcsc39(), trust)); merged - .setOamandatepublications( - chooseReference(merged.getOamandatepublications(), enrich.getOamandatepublications(), trust)); + .setOamandatepublications( + chooseReference(merged.getOamandatepublications(), enrich.getOamandatepublications(), trust)); merged.setEcarticle29_3(chooseReference(merged.getEcarticle29_3(), enrich.getEcarticle29_3(), trust)); merged.setSubjects(unionDistinctLists(merged.getSubjects(), enrich.getSubjects(), trust)); merged.setFundingtree(unionDistinctLists(merged.getFundingtree(), enrich.getFundingtree(), trust)); @@ -893,8 +894,8 @@ public class MergeUtils { } merged - .setH2020classification( - unionDistinctLists(merged.getH2020classification(), enrich.getH2020classification(), trust)); + .setH2020classification( + unionDistinctLists(merged.getH2020classification(), enrich.getH2020classification(), trust)); return merged; } @@ -921,7 +922,7 @@ public class MergeUtils { * @return list of instances possibly enriched */ private static List enrichInstances(final List toEnrichInstances, - final List enrichmentInstances) { + final List enrichmentInstances) { final List enrichmentResult = new ArrayList<>(); if (toEnrichInstances == null) { @@ -959,42 +960,42 @@ public class MergeUtils { */ private static Map toInstanceMap(final List ri) { return ri - .stream() - .filter(i -> i.getPid() != null || i.getAlternateIdentifier() != null) - .flatMap(i -> { - final List> result = new ArrayList<>(); - if (i.getPid() != null) - i - .getPid() - .stream() - .filter(MergeUtils::validPid) - .forEach(p -> result.add(new ImmutablePair<>(extractKeyFromPid(p), i))); - if (i.getAlternateIdentifier() != null) - i - .getAlternateIdentifier() - .stream() - .filter(MergeUtils::validPid) - .forEach(p -> result.add(new ImmutablePair<>(extractKeyFromPid(p), i))); - return result.stream(); - }) - .collect( - Collectors - .toMap( - Pair::getLeft, - Pair::getRight, - (a, b) -> a)); + .stream() + .filter(i -> i.getPid() != null || i.getAlternateIdentifier() != null) + .flatMap(i -> { + final List> result = new ArrayList<>(); + if (i.getPid() != null) + i + .getPid() + .stream() + .filter(MergeUtils::validPid) + .forEach(p -> result.add(new ImmutablePair<>(extractKeyFromPid(p), i))); + if (i.getAlternateIdentifier() != null) + i + .getAlternateIdentifier() + .stream() + .filter(MergeUtils::validPid) + .forEach(p -> result.add(new ImmutablePair<>(extractKeyFromPid(p), i))); + return result.stream(); + }) + .collect( + Collectors + .toMap( + Pair::getLeft, + Pair::getRight, + (a, b) -> a)); } private static boolean isFromDelegatedAuthority(Result r) { return Optional - .ofNullable(r.getInstance()) - .map( - instance -> instance - .stream() - .filter(i -> Objects.nonNull(i.getCollectedfrom())) - .map(i -> i.getCollectedfrom().getKey()) - .anyMatch(cfId -> IdentifierFactory.delegatedAuthorityDatasourceIds().contains(cfId))) - .orElse(false); + .ofNullable(r.getInstance()) + .map( + instance -> instance + .stream() + .filter(i -> Objects.nonNull(i.getCollectedfrom())) + .map(i -> i.getCollectedfrom().getKey()) + .anyMatch(cfId -> IdentifierFactory.delegatedAuthorityDatasourceIds().contains(cfId))) + .orElse(false); } /** @@ -1030,15 +1031,15 @@ public class MergeUtils { * @return the list */ private static List findEnrichmentsByPID(final List pids, - final Map enrichments) { + final Map enrichments) { if (pids == null || enrichments == null) return null; return pids - .stream() - .map(MergeUtils::extractKeyFromPid) - .map(enrichments::get) - .filter(Objects::nonNull) - .collect(Collectors.toList()); + .stream() + .map(MergeUtils::extractKeyFromPid) + .map(enrichments::get) + .filter(Objects::nonNull) + .collect(Collectors.toList()); } /** @@ -1049,8 +1050,8 @@ public class MergeUtils { */ private static boolean isAnEnrichment(OafEntity e) { return e.getDataInfo() != null && - e.getDataInfo().getProvenanceaction() != null - && ModelConstants.PROVENANCE_ENRICH.equalsIgnoreCase(e.getDataInfo().getProvenanceaction().getClassid()); + e.getDataInfo().getProvenanceaction() != null + && ModelConstants.PROVENANCE_ENRICH.equalsIgnoreCase(e.getDataInfo().getProvenanceaction().getClassid()); } /** @@ -1073,17 +1074,17 @@ public class MergeUtils { merge.setHostedby(firstNonNull(merge.getHostedby(), enrichment.getHostedby())); merge.setUrl(unionDistinctLists(merge.getUrl(), enrichment.getUrl(), 0)); merge - .setDistributionlocation( - firstNonNull(merge.getDistributionlocation(), enrichment.getDistributionlocation())); + .setDistributionlocation( + firstNonNull(merge.getDistributionlocation(), enrichment.getDistributionlocation())); merge.setCollectedfrom(firstNonNull(merge.getCollectedfrom(), enrichment.getCollectedfrom())); // pid and alternateId are used for matching merge.setDateofacceptance(firstNonNull(merge.getDateofacceptance(), enrichment.getDateofacceptance())); merge - .setProcessingchargeamount( - firstNonNull(merge.getProcessingchargeamount(), enrichment.getProcessingchargeamount())); + .setProcessingchargeamount( + firstNonNull(merge.getProcessingchargeamount(), enrichment.getProcessingchargeamount())); merge - .setProcessingchargecurrency( - firstNonNull(merge.getProcessingchargecurrency(), enrichment.getProcessingchargecurrency())); + .setProcessingchargecurrency( + firstNonNull(merge.getProcessingchargecurrency(), enrichment.getProcessingchargecurrency())); merge.setRefereed(firstNonNull(merge.getRefereed(), enrichment.getRefereed())); merge.setMeasures(unionDistinctLists(merge.getMeasures(), enrichment.getMeasures(), 0)); merge.setFulltext(firstNonNull(merge.getFulltext(), enrichment.getFulltext())); @@ -1091,14 +1092,14 @@ public class MergeUtils { private static int compareTrust(Oaf a, Oaf b) { String left = Optional - .ofNullable(a.getDataInfo()) - .map(DataInfo::getTrust) - .orElse("0.0"); + .ofNullable(a.getDataInfo()) + .map(DataInfo::getTrust) + .orElse("0.0"); String right = Optional - .ofNullable(b.getDataInfo()) - .map(DataInfo::getTrust) - .orElse("0.0"); + .ofNullable(b.getDataInfo()) + .map(DataInfo::getTrust) + .orElse("0.0"); return left.compareTo(right); } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/personentity/ExtractPerson.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/personentity/ExtractPerson.java index 06924f05a..05f083740 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/personentity/ExtractPerson.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/personentity/ExtractPerson.java @@ -346,15 +346,19 @@ public class ExtractPerson implements Serializable { .structuredProperty( op.getOrcid(), ModelConstants.ORCID, ModelConstants.ORCID_CLASSNAME, ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES, - OafMapperUtils.dataInfo(false, - null, - false, - false, - OafMapperUtils.qualifier(ModelConstants.SYSIMPORT_CROSSWALK_ENTITYREGISTRY, - ModelConstants.SYSIMPORT_CROSSWALK_ENTITYREGISTRY, - ModelConstants.DNET_PID_TYPES, - ModelConstants.DNET_PID_TYPES), - "0.91"))); + OafMapperUtils + .dataInfo( + false, + null, + false, + false, + OafMapperUtils + .qualifier( + ModelConstants.SYSIMPORT_CROSSWALK_ENTITYREGISTRY, + ModelConstants.SYSIMPORT_CROSSWALK_ENTITYREGISTRY, + ModelConstants.DNET_PID_TYPES, + ModelConstants.DNET_PID_TYPES), + "0.91"))); person.setDateofcollection(op.getLastModifiedDate()); person.setOriginalId(Arrays.asList(op.getOrcid())); person.setDataInfo(ORCIDDATAINFO); diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/gtr2/Gtr2PublicationsIterator.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/gtr2/Gtr2PublicationsIterator.java index 779c43712..1b1ff8db4 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/gtr2/Gtr2PublicationsIterator.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/gtr2/Gtr2PublicationsIterator.java @@ -3,6 +3,7 @@ package eu.dnetlib.dhp.collection.plugin.gtr2; import java.nio.charset.StandardCharsets; import java.time.LocalDate; +import java.time.format.DateTimeFormatter; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; @@ -18,7 +19,6 @@ import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.DocumentHelper; import org.dom4j.Element; -import java.time.format.DateTimeFormatter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -188,11 +188,11 @@ public class Gtr2PublicationsIterator implements Iterator { private Document loadURL(final String cleanUrl, final int attempt) { try { - log.debug(" * Downloading Url: {}", cleanUrl); + log.debug(" * Downloading Url: {}", cleanUrl); final byte[] bytes = this.connector.getInputSource(cleanUrl).getBytes(StandardCharsets.UTF_8); return DocumentHelper.parseText(new String(bytes)); } catch (final Throwable e) { - log.error("Error dowloading url: {}, attempt = {}", cleanUrl, attempt, e); + log.error("Error dowloading url: {}, attempt = {}", cleanUrl, attempt, e); if (attempt >= MAX_ATTEMPTS) { throw new RuntimeException("Error downloading url: " + cleanUrl, e); } From 03c262ccb981bed4d6351705026e699963c9e4fc Mon Sep 17 00:00:00 2001 From: Giambattista Bloisi Date: Wed, 13 Nov 2024 10:56:17 +0100 Subject: [PATCH 21/73] Crossref: generate canonical openaire id for results in affiliation relationship --- .gitignore | 1 + .../eu/dnetlib/dhp/collection/crossref/Crossref2Oaf.scala | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 6fafc7055..ef9144ae3 100644 --- a/.gitignore +++ b/.gitignore @@ -28,3 +28,4 @@ spark-warehouse /**/.scalafmt.conf /.java-version /dhp-shade-package/dependency-reduced-pom.xml +/**/job.properties diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/crossref/Crossref2Oaf.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/crossref/Crossref2Oaf.scala index e7d68920b..d3a68c92e 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/crossref/Crossref2Oaf.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/crossref/Crossref2Oaf.scala @@ -673,11 +673,12 @@ case object Crossref2Oaf { val doi = input.getString(0) val rorId = input.getString(1) - val pubId = s"50|${PidType.doi.toString.padTo(12, "_")}::${DoiCleaningRule.clean(doi)}" + + val pubId = IdentifierFactory.idFromPid("50", "doi", DoiCleaningRule.clean(doi), true) val affId = GenerateRorActionSetJob.calculateOpenaireId(rorId) val r: Relation = new Relation - DoiCleaningRule.clean(doi) + r.setSource(pubId) r.setTarget(affId) r.setRelType(ModelConstants.RESULT_ORGANIZATION) From fb1f0f8850b867f758fffdf9751ec9e4d2543db5 Mon Sep 17 00:00:00 2001 From: Miriam Baglioni Date: Thu, 7 Nov 2024 14:05:02 +0100 Subject: [PATCH 22/73] [danishfunders] added the possibility to link also versus a specif award if present in the metadata --- .../collection/crossref/Crossref2Oaf.scala | 21 ++++++++++++++++++- .../doiboost/crossref/Crossref2Oaf.scala | 3 +++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/crossref/Crossref2Oaf.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/crossref/Crossref2Oaf.scala index e7d68920b..59a12bc03 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/crossref/Crossref2Oaf.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/crossref/Crossref2Oaf.scala @@ -978,7 +978,26 @@ case object Crossref2Oaf { case "10.13039/501100010790" => generateSimpleRelationFromAward(funder, "erasmusplus_", a => a) case _ => logger.debug("no match for " + funder.DOI.get) - + //Add for Danish funders + //Independent Research Fund Denmark (IRFD) + case "10.13039/501100004836" => + generateSimpleRelationFromAward(funder, "irfd________", a => a) + val targetId = getProjectId("irfd________", "1e5e62235d094afd01cd56e65112fc63") + queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY) + queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES) + //Carlsberg Foundation (CF) + case "10.13039/501100002808" => + generateSimpleRelationFromAward(funder, "cf__________", a => a) + val targetId = getProjectId("cf__________", "1e5e62235d094afd01cd56e65112fc63") + queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY) + queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES) + //Novo Nordisk Foundation (NNF) + case "10.13039/501100009708" => + generateSimpleRelationFromAward(funder, "nnf___________", a => a) + val targetId = getProjectId("nnf_________", "1e5e62235d094afd01cd56e65112fc63") + queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY) + queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES) + case _ => logger.debug("no match for " + funder.DOI.get) } } else { diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala index bf11ed0a8..031a04058 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala @@ -569,16 +569,19 @@ case object Crossref2Oaf { //Add for Danish funders //Independent Research Fund Denmark (IRFD) case "10.13039/501100004836" => + generateSimpleRelationFromAward(funder, "irfd________", a => a) val targetId = getProjectId("irfd________", "1e5e62235d094afd01cd56e65112fc63") queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY) queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES) //Carlsberg Foundation (CF) case "10.13039/501100002808" => + generateSimpleRelationFromAward(funder, "cf__________", a => a) val targetId = getProjectId("cf__________", "1e5e62235d094afd01cd56e65112fc63") queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY) queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES) //Novo Nordisk Foundation (NNF) case "10.13039/501100009708" => + generateSimpleRelationFromAward(funder, "nnf___________", a => a) val targetId = getProjectId("nnf_________", "1e5e62235d094afd01cd56e65112fc63") queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY) queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES) From ac0a94d62d7c34fc3953d47a9a2263ac5cfadb82 Mon Sep 17 00:00:00 2001 From: "sandro.labruzzo" Date: Wed, 13 Nov 2024 16:26:59 +0100 Subject: [PATCH 23/73] updated pubmed parser to add also ORCID id and affiliation string to authors --- .../dhp/sx/bio/pubmed/PMAffiliation.java | 39 +++ .../dnetlib/dhp/sx/bio/pubmed/PMAuthor.java | 39 +++ .../dhp/sx/bio/pubmed/PMIdentifier.java | 53 +++++ .../dnetlib/dhp/sx/bio/pubmed/PMParser2.scala | 30 ++- .../dhp/sx/bio/pubmed/PubMedToOaf.scala | 6 + .../dhp/sx/graph/bio/single_pubmed.xml | 223 +++++++----------- .../dnetlib/dhp/sx/bio/BioScholixTest.scala | 64 ++++- 7 files changed, 300 insertions(+), 154 deletions(-) create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMAffiliation.java create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMIdentifier.java diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMAffiliation.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMAffiliation.java new file mode 100644 index 000000000..54aba8715 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMAffiliation.java @@ -0,0 +1,39 @@ +package eu.dnetlib.dhp.sx.bio.pubmed; + +/** + * The type Pubmed Affiliation. + * + * @author Sandro La Bruzzo + */ +public class PMAffiliation { + + private String name; + + private PMIdentifier identifier; + + public PMAffiliation() { + + } + public PMAffiliation(String name, PMIdentifier identifier) { + this.name = name; + this.identifier = identifier; + } + + public String getName() { + return name; + } + + public PMAffiliation setName(String name) { + this.name = name; + return this; + } + + public PMIdentifier getIdentifier() { + return identifier; + } + + public PMAffiliation setIdentifier(PMIdentifier identifier) { + this.identifier = identifier; + return this; + } +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMAuthor.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMAuthor.java index 68ef6459e..b0df25663 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMAuthor.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMAuthor.java @@ -12,6 +12,8 @@ public class PMAuthor implements Serializable { private String lastName; private String foreName; + private PMIdentifier identifier; + private PMAffiliation affiliation; /** * Gets last name. @@ -59,4 +61,41 @@ public class PMAuthor implements Serializable { .format("%s, %s", this.foreName != null ? this.foreName : "", this.lastName != null ? this.lastName : ""); } + /** + * Gets identifier. + * + * @return the identifier + */ + public PMIdentifier getIdentifier() { + return identifier; + } + + /** + * Sets identifier. + * + * @param identifier the identifier + */ + public void setIdentifier(PMIdentifier identifier) { + this.identifier = identifier; + } + + /** + * Gets affiliation. + * + * @return the affiliation + */ + public PMAffiliation getAffiliation() { + return affiliation; + } + + /** + * Sets affiliation. + * + * @param affiliation the affiliation + */ + public void setAffiliation(PMAffiliation affiliation) { + this.affiliation = affiliation; + } + + } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMIdentifier.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMIdentifier.java new file mode 100644 index 000000000..0c8c55e40 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMIdentifier.java @@ -0,0 +1,53 @@ +package eu.dnetlib.dhp.sx.bio.pubmed; + +public class PMIdentifier { + + private String pid; + private String type; + + + public PMIdentifier(String pid, String type) { + this.pid = cleanPid(pid); + this.type = type; + } + + public PMIdentifier() { + + } + + private String cleanPid(String pid) { + + if (pid == null) { + return null; + } + + // clean ORCID ID in the form 0000000163025705 to 0000-0001-6302-5705 + if (pid.matches("[0-9]{15}[0-9X]")) { + return pid.replaceAll("(.{4})(.{4})(.{4})(.{4})", "$1-$2-$3-$4"); + } + + // clean ORCID in the form http://orcid.org/0000-0001-8567-3543 to 0000-0001-8567-3543 + if (pid.matches("http://orcid.org/[0-9]{4}-[0-9]{4}-[0-9]{4}-[0-9]{4}")) { + return pid.replaceAll("http://orcid.org/", ""); + } + return pid; + } + + public String getPid() { + return pid; + } + + public PMIdentifier setPid(String pid) { + this.pid = cleanPid(pid); + return this; + } + + public String getType() { + return type; + } + + public PMIdentifier setType(String type) { + this.type = type; + return this; + } +} diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PMParser2.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PMParser2.scala index c9e868185..2eb4bea65 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PMParser2.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PMParser2.scala @@ -81,6 +81,26 @@ class PMParser2 { val a = new PMAuthor a.setLastName((author \ "LastName").text) a.setForeName((author \ "ForeName").text) + val id = (author \ "Identifier").text + val idType =(author \ "Identifier" \ "@Source").text + + if(id != null && id.nonEmpty && idType != null && idType.nonEmpty) { + a.setIdentifier(new PMIdentifier(id, idType)) + } + + + val affiliation = (author \ "AffiliationInfo" \ "Affiliation").text + val affiliationId = (author \ "AffiliationInfo" \ "Identifier").text + val affiliationIdType = (author \ "AffiliationInfo" \ "Identifier" \ "@Source").text + + if(affiliation != null && affiliation.nonEmpty) { + val aff = new PMAffiliation() + aff.setName(affiliation) + if(affiliationId != null && affiliationId.nonEmpty && affiliationIdType != null && affiliationIdType.nonEmpty) { + aff.setIdentifier(new PMIdentifier(affiliationId, affiliationIdType)) + } + a.setAffiliation(aff) + } a }) .toList @@ -99,15 +119,7 @@ class PMParser2 { val authors = xml \ "MedlineCitation" \ "Article" \ "AuthorList" \ "Author" article.setAuthors( - authors - .map(author => { - val a = new PMAuthor - a.setLastName((author \ "LastName").text) - a.setForeName((author \ "ForeName").text) - a - }) - .toList - .asJava + extractAuthors(authors).asJava ) val pmId = xml \ "MedlineCitation" \ "PMID" diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala index d59d73bd0..5e14c731a 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala @@ -294,6 +294,12 @@ object PubMedToOaf { author.setName(a.getForeName) author.setSurname(a.getLastName) author.setFullname(a.getFullName) + if(a.getIdentifier != null) { + author.setPid(List(OafMapperUtils.structuredProperty(a.getIdentifier.getPid, + OafMapperUtils.qualifier(a.getIdentifier.getType,a.getIdentifier.getType,ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES), dataInfo)).asJava) + } + if (a.getAffiliation!= null) + author.setRawAffiliationString(List(a.getAffiliation.getName).asJava) author.setRank(index + 1) author }(collection.breakOut) diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/single_pubmed.xml b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/single_pubmed.xml index 4b4d860d7..c2e503f57 100644 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/single_pubmed.xml +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/single_pubmed.xml @@ -1,77 +1,56 @@ - - 37885214 + + 37318999 2024 02 - 14 + 09 2024 02 - 14 + 09
- 2752-7549 + 1522-1229 - 40 - 5 + 47 + 3 - 2023 Sep-Oct + 2023 + Sep + 01 - Journal of pediatric hematology/oncology nursing - J Pediatr Hematol Oncol Nurs + Advances in physiology education + Adv Physiol Educ - Care Needs of Parents of Children With Cancer in a Low-Middle-Income Country. + Providing the choice of in-person or videoconference attendance in a clinical physiology course may harm learning outcomes for the entire cohort. - 295-304 + 548-556 - 10.1177/27527530231193972 + 10.1152/advan.00160.2022 - Background: Mapping out actual supportive care needs assists nurses in providing holistic individualized care. This study aimed to explore the care needs of parents of children with cancer in the Philippines. Method: Guided by the Supportive Care Needs Framework (SCNF), this study used an embedded mixed-method design with the quantitative revised Cancer Patient Needs Questionnaire and qualitative semistructured interviews to describe parents' care needs and priorities. Results: Filipino parents (N = 156) of children with cancer have various care needs which could be classified along the SCNF categories-practical, informational, spiritual, physical, emotional, and physical needs as ranked from highest to lowest. A number of variables were significantly associated with care needs. Solid tumor diagnosis was associated with greater practical, emotional, and psychosocial care needs; having a child who had undergone surgery was associated with more practical and spiritual care needs; and being within one year of the child's diagnosis was associated with practical, psychosocial, and spiritual care needs. Parent priority needs included (a) addressing financial needs; (b) access to temporary housing to minimize treatment-related costs; (c) support groups among parents of children with cancer as a source of information; (d) financial and social support between members of family and partners of parents of children with cancer; and (e) using prayer to facilitate acceptance. Conclusions: Supportive care needs of parents of children with cancer are important components of care that should be given recognition to enhance holistic individualized care throughout the childhood cancer experience. + Clinical Physiology 1 and 2 are flipped classes in which students watch prerecorded videos before class. During the 3-h class, students take practice assessments, work in groups on critical thinking exercises, work through case studies, and engage in drawing exercises. Due to the COVID pandemic, these courses were transitioned from in-person classes to online classes. Despite the university's return-to-class policy, some students were reluctant to return to in-person classes; therefore during the 2021-2022 academic year, Clinical Physiology 1 and 2 were offered as flipped, hybrid courses. In a hybrid format, students either attended the synchronous class in person or online. Here we evaluate the learning outcomes and the perceptions of the learning experience for students who attended Clinical Physiology 1 and 2 either online (2020-2021) or in a hybrid format (2021-2022). In addition to exam scores, in-class surveys and end of course evaluations were compiled to describe the student experience in the flipped hybrid setting. Retrospective linear mixed-model regression analysis of exam scores revealed that a hybrid modality (2021-2022) was associated with lower exam scores when controlling for sex, graduate/undergraduate status, delivery method, and the order in which the courses were taken (F test: F = 8.65, df1 = 2, df2 = 179.28, P = 0.0003). In addition, being a Black Indigenous Person of Color (BIPOC) student is associated with a lower exam score, controlling for the same previous factors (F test: F = 4.23, df1 = 1, df2 = 130.28, P = 0.04), albeit with lower confidence; the BIPOC representation in this sample is small (BIPOC: n = 144; total: n = 504). There is no significant interaction between the hybrid modality and race, meaning that BIPOC and White students are both negatively affected in a hybrid flipped course. Instructors should consider carefully about offering hybrid courses and build in extra student support.NEW & NOTEWORTHY The transition from online to in-person teaching has been as challenging as the original transition to remote teaching with the onset of the pandemic. Since not all students were ready to return to the classroom, students could choose to take this course in person or online. This arrangement provided flexibility and opportunities for innovative class activities for students but introduced tradeoffs in lower test scores from the hybrid modality than fully online or fully in-person modalities. - Banayat - Aprille Campos - AC - 0000-0001-9339-9871 + Anderson + Lisa Carney + LC + 0000-0003-2261-1921 - College of Nursing, University of the Philippines Manila, Manila, Philippines. + Department of Integrative Biology and Physiology, University of Minnesota, Minneapolis, Minnesota, United States. + https://ror.org/017zqws13 - Abad - Peter James B - PJB + Jacobson + Tate + T - College of Nursing, University of the Philippines Manila, Manila, Philippines. - - - - Bonito - Sheila R - SR - - College of Nursing, University of the Philippines Manila, Manila, Philippines. - - - - Manahan - Lydia T - LT - - College of Nursing, University of the Philippines Manila, Manila, Philippines. - - - - Peralta - Arnold B - AB - - College of Nursing, University of the Philippines Manila, Manila, Philippines. + Department of Statistics, University of Minnesota, Minneapolis, Minnesota, United States. @@ -81,142 +60,98 @@ 2023 - 10 - 26 + 06 + 15
United States - J Pediatr Hematol Oncol Nurs - 9918282681506676 - 2752-7530 + Adv Physiol Educ + 100913944 + 1043-4046 IM - Child + Physiology + education + + + Retrospective Studies + + + Learning + + + Pandemics + + + COVID-19 + + + Regression Analysis + + + Students Humans - Parents - psychology + Male - Social Support + Female - Spirituality + White People - Religion + Black People - Neoplasms - therapy + Education, Distance + + + Curriculum - cancer - mixed methods - parent - pediatric - research - supportive care + flipped teaching + hybrid teaching + inequity + learning outcomes + responsive teaching - Declaration of Conflicting InterestsThe author(s) declared no potential conflicts of interest with respect to the research, authorship, and/or publication of this article.
- 2024 - 2 - 12 - 18 - 42 + 2023 + 7 + 21 + 6 + 44 2023 - 10 - 27 - 6 - 42 + 6 + 15 + 19 + 14 2023 - 10 - 27 - 3 - 43 + 6 + 15 + 12 + 53 ppublish - 37885214 - 10.1177/27527530231193972 + 37318999 + 10.1152/advan.00160.2022
- -30522158 -32769323 -34061701 -34661197 -34837091 -35035475 -35211699 -35557982 -35782783 -35795240 -35832688 -35847411 -36081602 -36081858 -36468085 -36468934 -36580086 -36589526 -36619609 -36649460 -36654909 -36655054 -36700856 -36705625 -36713939 -36714172 -36741203 -36741905 -36743825 -36788221 -36844926 -36846546 -36935776 -36946757 -36972191 -37034422 -37124311 -37152108 -37171968 -37273889 -37333905 -37387733 -37431449 -37576947 -37601162 -37711214 -37901290 -37981909 -37981945 -37982005 -38037601 -38037602 -38150730 -38274640 -38332671 -38334184 -38335456 -38349506 -38349576 -38353676 - \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala b/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala index c942ca132..4a926df01 100644 --- a/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala +++ b/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala @@ -19,9 +19,11 @@ import org.mockito.junit.jupiter.MockitoExtension import org.slf4j.LoggerFactory import java.io.{BufferedReader, InputStream, InputStreamReader} +import java.util.regex.Pattern import java.util.zip.GZIPInputStream import javax.xml.stream.XMLInputFactory import scala.collection.JavaConverters._ +import scala.collection.mutable import scala.collection.mutable.ListBuffer import scala.io.Source @@ -51,6 +53,64 @@ class BioScholixTest extends AbstractVocabularyTest { } } + @Test + def testPid(): Unit = { + val pids = List( + "0000000163025705", + "000000018494732X", + "0000000308873343", + "0000000335964515", + "0000000333457333", + "0000000335964515", + "0000000302921949", + + "http://orcid.org/0000-0001-8567-3543", + "http://orcid.org/0000-0001-7868-8528", + "0000-0001-9189-1440", + "0000-0003-3727-9247", + "0000-0001-7246-1058", + "000000033962389X", + "0000000330371470", + "0000000171236123", + "0000000272569752", + "0000000293231371", + "http://orcid.org/0000-0003-3345-7333", + "0000000340145688", + "http://orcid.org/0000-0003-4894-1689" + ) + + pids.foreach(pid => { + val pidCleaned = new PMIdentifier(pid, "ORCID").getPid + // assert pid is in the format of ORCID + println(pidCleaned) + assertTrue(pidCleaned.matches("[0-9]{4}-[0-9]{4}-[0-9]{4}-[0-9]{3}[0-9X]")) + }) + } + + def extractAffiliation(s: String): List[String] = { + val regex: String = "(.*)<\\/Affiliation>" + val pattern = Pattern.compile(regex, Pattern.MULTILINE) + val matcher = pattern.matcher(s) + val l: mutable.ListBuffer[String] = mutable.ListBuffer() + while (matcher.find()) { + l += matcher.group(1) + } + l.toList + } + + case class AuthorPID(pidType: String, pid: String) {} + + def extractAuthorIdentifier(s: String): List[AuthorPID] = { + val regex: String = "(.*)<\\/Identifier>" + val pattern = Pattern.compile(regex, Pattern.MULTILINE) + val matcher = pattern.matcher(s) + val l: mutable.ListBuffer[AuthorPID] = mutable.ListBuffer() + while (matcher.find()) { + l += AuthorPID(pidType = matcher.group(1), pid = matcher.group(2)) + } + l.toList + } + @Test def testParsingPubmed2(): Unit = { val mapper = new ObjectMapper() @@ -58,7 +118,9 @@ class BioScholixTest extends AbstractVocabularyTest { val parser = new PMParser2() val article = parser.parse(xml) - println(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(article)) +// println(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(article)) + + println(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(PubMedToOaf.convert(article, vocabularies))) } From 4a3b173ca2d917c52de1671c352d1296ac211736 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 13 Nov 2024 16:27:00 +0100 Subject: [PATCH 24/73] defaults to 0000 - Unknown in case the instance type lookup in the dnet:result_typologies doesn't find a corresponding result type binding --- .../dhp/schema/oaf/utils/MergeUtils.java | 12 +--- .../raw/AbstractMdRecordToOafMapper.java | 55 ++++++++++--------- .../dhp/oa/graph/raw/OafToOafMapper.java | 4 +- .../dhp/oa/graph/raw/OdfToOafMapper.java | 4 +- 4 files changed, 36 insertions(+), 39 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtils.java index dc76860f8..c9b235fd6 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtils.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtils.java @@ -74,15 +74,9 @@ public class MergeUtils { if (!vocs.vocabularyExists(ModelConstants.DNET_RESULT_TYPOLOGIES)) { return (T) mergedResult; } else { - final Qualifier expectedResultType = vocs.lookupTermBySynonym( - ModelConstants.DNET_RESULT_TYPOLOGIES, - i.getInstancetype().getClassid()); - - if (Objects.isNull(expectedResultType)) { - throw new IllegalArgumentException( - "instance type not bound to any result type in dnet:result_typologies: " + - i.getInstancetype().getClassid()); - } + final Qualifier expectedResultType = Optional + .ofNullable(vocs.lookupTermBySynonym(ModelConstants.DNET_RESULT_TYPOLOGIES, i.getInstancetype().getClassid())) + .orElse(OafMapperUtils.unknown(ModelConstants.DNET_RESULT_TYPOLOGIES, ModelConstants.DNET_RESULT_TYPOLOGIES)); // there is a clash among the result types if (!expectedResultType.getClassid().equals(mergedResult.getResulttype().getClassid())) { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java index ba6887a2e..be84778f5 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java @@ -153,30 +153,33 @@ public abstract class AbstractMdRecordToOafMapper { final DataInfo entityInfo = prepareDataInfo(doc, this.invisible); final long lastUpdateTimestamp = new Date().getTime(); - final List instances = prepareInstances(doc, entityInfo, collectedFrom, hostedBy); + final Instance instance = prepareInstances(doc, entityInfo, collectedFrom, hostedBy); - final String type = getResultType(instances); + if (!Optional.ofNullable(instance.getInstancetype()) + .map(Qualifier::getClassid) + .filter(StringUtils::isNotBlank) + .isPresent()) { + return Lists.newArrayList(); + } - return createOafs(doc, type, instances, collectedFrom, entityInfo, lastUpdateTimestamp); + final String type = getResultType(instance); + + return createOafs(doc, type, instance, collectedFrom, entityInfo, lastUpdateTimestamp); } catch (final DocumentException e) { log.error("Error with record:\n" + xml); return Lists.newArrayList(); } } - protected String getResultType(final List instances) { - + protected String getResultType(final Instance instance) { if (this.vocs.vocabularyExists(ModelConstants.DNET_RESULT_TYPOLOGIES)) { - final String instanceType = instances - .stream() - .map(i -> i.getInstancetype().getClassid()) - .findFirst() - .filter(s -> !UNKNOWN.equalsIgnoreCase(s)) - .orElse("0000"); // Unknown - return Optional - .ofNullable(this.vocs.getSynonymAsQualifier(ModelConstants.DNET_RESULT_TYPOLOGIES, instanceType)) - .map(Qualifier::getClassid) - .orElse("0000"); + return Optional.ofNullable(instance.getInstancetype()) + .map(Qualifier::getClassid) + .map(instanceType -> Optional + .ofNullable(this.vocs.getSynonymAsQualifier(ModelConstants.DNET_RESULT_TYPOLOGIES, instanceType)) + .map(Qualifier::getClassid) + .orElse("0000")) + .orElse("0000"); } else { throw new IllegalStateException("Missing vocabulary: " + ModelConstants.DNET_RESULT_TYPOLOGIES); } @@ -196,12 +199,12 @@ public abstract class AbstractMdRecordToOafMapper { protected List createOafs( final Document doc, final String type, - final List instances, + final Instance instance, final KeyValue collectedFrom, final DataInfo info, final long lastUpdateTimestamp) { - final OafEntity entity = createEntity(doc, type, instances, collectedFrom, info, lastUpdateTimestamp); + final OafEntity entity = createEntity(doc, type, instance, collectedFrom, info, lastUpdateTimestamp); final Set originalId = Sets.newHashSet(entity.getOriginalId()); originalId.add(entity.getId()); @@ -234,19 +237,19 @@ public abstract class AbstractMdRecordToOafMapper { private OafEntity createEntity(final Document doc, final String type, - final List instances, + final Instance instance, final KeyValue collectedFrom, final DataInfo info, final long lastUpdateTimestamp) { switch (type.toLowerCase()) { case "publication": final Publication p = new Publication(); - populateResultFields(p, doc, instances, collectedFrom, info, lastUpdateTimestamp); + populateResultFields(p, doc, instance, collectedFrom, info, lastUpdateTimestamp); p.setJournal(prepareJournal(doc, info)); return p; case "dataset": final Dataset d = new Dataset(); - populateResultFields(d, doc, instances, collectedFrom, info, lastUpdateTimestamp); + populateResultFields(d, doc, instance, collectedFrom, info, lastUpdateTimestamp); d.setStoragedate(prepareDatasetStorageDate(doc, info)); d.setDevice(prepareDatasetDevice(doc, info)); d.setSize(prepareDatasetSize(doc, info)); @@ -257,7 +260,7 @@ public abstract class AbstractMdRecordToOafMapper { return d; case "software": final Software s = new Software(); - populateResultFields(s, doc, instances, collectedFrom, info, lastUpdateTimestamp); + populateResultFields(s, doc, instance, collectedFrom, info, lastUpdateTimestamp); s.setDocumentationUrl(prepareSoftwareDocumentationUrls(doc, info)); s.setLicense(prepareSoftwareLicenses(doc, info)); s.setCodeRepositoryUrl(prepareSoftwareCodeRepositoryUrl(doc, info)); @@ -267,7 +270,7 @@ public abstract class AbstractMdRecordToOafMapper { case "otherresearchproducts": default: final OtherResearchProduct o = new OtherResearchProduct(); - populateResultFields(o, doc, instances, collectedFrom, info, lastUpdateTimestamp); + populateResultFields(o, doc, instance, collectedFrom, info, lastUpdateTimestamp); o.setContactperson(prepareOtherResearchProductContactPersons(doc, info)); o.setContactgroup(prepareOtherResearchProductContactGroups(doc, info)); o.setTool(prepareOtherResearchProductTools(doc, info)); @@ -414,7 +417,7 @@ public abstract class AbstractMdRecordToOafMapper { private void populateResultFields( final Result r, final Document doc, - final List instances, + final Instance instance, final KeyValue collectedFrom, final DataInfo info, final long lastUpdateTimestamp) { @@ -448,8 +451,8 @@ public abstract class AbstractMdRecordToOafMapper { r.setExternalReference(new ArrayList<>()); // NOT PRESENT IN MDSTORES r.setProcessingchargeamount(field(doc.valueOf("//oaf:processingchargeamount"), info)); r.setProcessingchargecurrency(field(doc.valueOf("//oaf:processingchargeamount/@currency"), info)); - r.setInstance(instances); - r.setBestaccessright(OafMapperUtils.createBestAccessRights(instances)); + r.setInstance(Arrays.asList(instance)); + r.setBestaccessright(OafMapperUtils.createBestAccessRights(Arrays.asList(instance))); r.setEoscifguidelines(prepareEOSCIfGuidelines(doc, info)); } @@ -508,7 +511,7 @@ public abstract class AbstractMdRecordToOafMapper { protected abstract Qualifier prepareResourceType(Document doc, DataInfo info); - protected abstract List prepareInstances( + protected abstract Instance prepareInstances( Document doc, DataInfo info, KeyValue collectedfrom, diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java index 98da48f9e..33351e91f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java @@ -135,7 +135,7 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { } @Override - protected List prepareInstances( + protected Instance prepareInstances( final Document doc, final DataInfo info, final KeyValue collectedfrom, @@ -197,7 +197,7 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { instance.getUrl().addAll(validUrl); } - return Lists.newArrayList(instance); + return instance; } /** diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java index ad61304a0..a811aad46 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java @@ -126,7 +126,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { } @Override - protected List prepareInstances( + protected Instance prepareInstances( final Document doc, final DataInfo info, final KeyValue collectedfrom, @@ -210,7 +210,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { instance.setUrl(new ArrayList<>()); instance.getUrl().addAll(validUrl); } - return Arrays.asList(instance); + return instance; } protected String trimAndDecodeUrl(String url) { From b95672b4204667f1b011a7b6ed281b7fcbb3525c Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 15 Nov 2024 09:16:18 +0100 Subject: [PATCH 25/73] mergeUtils set the result identifier when enforcing the result type --- .../dhp/schema/oaf/utils/MergeUtils.java | 451 +++++++++--------- 1 file changed, 229 insertions(+), 222 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtils.java index c9b235fd6..c092f6035 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtils.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtils.java @@ -16,8 +16,6 @@ import java.util.function.Function; import java.util.stream.Collectors; import java.util.stream.Stream; -import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; -import eu.dnetlib.dhp.schema.common.EntityType; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; @@ -25,8 +23,10 @@ import org.apache.commons.lang3.tuple.Pair; import com.github.sisyphsu.dateparser.DateParserUtils; import com.google.common.base.Joiner; +import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.oa.merge.AuthorMerger; import eu.dnetlib.dhp.schema.common.AccessRightComparator; +import eu.dnetlib.dhp.schema.common.EntityType; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.*; @@ -46,7 +46,7 @@ public class MergeUtils { } public static T mergeGroup(Iterator oafEntityIterator, - boolean checkDelegateAuthority, VocabularyGroup vocs) { + boolean checkDelegateAuthority, VocabularyGroup vocs) { ArrayList sortedEntities = new ArrayList<>(); oafEntityIterator.forEachRemaining(sortedEntities::add); @@ -74,22 +74,29 @@ public class MergeUtils { if (!vocs.vocabularyExists(ModelConstants.DNET_RESULT_TYPOLOGIES)) { return (T) mergedResult; } else { - final Qualifier expectedResultType = Optional - .ofNullable(vocs.lookupTermBySynonym(ModelConstants.DNET_RESULT_TYPOLOGIES, i.getInstancetype().getClassid())) - .orElse(OafMapperUtils.unknown(ModelConstants.DNET_RESULT_TYPOLOGIES, ModelConstants.DNET_RESULT_TYPOLOGIES)); + final String expectedResultType = Optional + .ofNullable( + vocs + .lookupTermBySynonym( + ModelConstants.DNET_RESULT_TYPOLOGIES, i.getInstancetype().getClassid())) + .orElse(ModelConstants.ORP_DEFAULT_RESULTTYPE) + .getClassid(); // there is a clash among the result types - if (!expectedResultType.getClassid().equals(mergedResult.getResulttype().getClassid())) { - try { - String resulttype = expectedResultType.getClassid(); - if (EntityType.otherresearchproduct.toString().equals(resulttype)) { - resulttype = "other"; - } - Result result = (Result) ModelSupport.oafTypes.get(resulttype).newInstance(); - return (T) mergeResultFields(result, mergedResult); - } catch (InstantiationException | IllegalAccessException e) { - throw new IllegalStateException(e); - } + if (!expectedResultType.equals(mergedResult.getResulttype().getClassid())) { + + Result result = (Result) Optional + .ofNullable(ModelSupport.oafTypes.get(expectedResultType)) + .map(r -> { + try { + return r.newInstance(); + } catch (InstantiationException | IllegalAccessException e) { + throw new IllegalStateException(e); + } + }) + .orElse(new OtherResearchProduct()); + result.setId(mergedResult.getId()); + return (T) mergeResultFields(result, mergedResult); } else { return (T) mergedResult; } @@ -116,10 +123,10 @@ public class MergeUtils { return mergeRelation((Relation) left, (Relation) right); } else { throw new RuntimeException( - String - .format( - "MERGE_FROM_AND_GET incompatible types: %s, %s", - left.getClass().getCanonicalName(), right.getClass().getCanonicalName())); + String + .format( + "MERGE_FROM_AND_GET incompatible types: %s, %s", + left.getClass().getCanonicalName(), right.getClass().getCanonicalName())); } } @@ -158,10 +165,10 @@ public class MergeUtils { return mergeProject((Project) left, (Project) right); } else { throw new RuntimeException( - String - .format( - "MERGE_FROM_AND_GET incompatible types: %s, %s", - left.getClass().getCanonicalName(), right.getClass().getCanonicalName())); + String + .format( + "MERGE_FROM_AND_GET incompatible types: %s, %s", + left.getClass().getCanonicalName(), right.getClass().getCanonicalName())); } } @@ -252,7 +259,7 @@ public class MergeUtils { } private static List mergeLists(final List left, final List right, int trust, - Function keyExtractor, BinaryOperator merger) { + Function keyExtractor, BinaryOperator merger) { if (left == null || left.isEmpty()) { return right != null ? right : new ArrayList<>(); } else if (right == null || right.isEmpty()) { @@ -263,11 +270,11 @@ public class MergeUtils { List l = trust >= 0 ? right : left; return new ArrayList<>(Stream - .concat(h.stream(), l.stream()) - .filter(Objects::nonNull) - .distinct() - .collect(Collectors.toMap(keyExtractor, v -> v, merger, LinkedHashMap::new)) - .values()); + .concat(h.stream(), l.stream()) + .filter(Objects::nonNull) + .distinct() + .collect(Collectors.toMap(keyExtractor, v -> v, merger, LinkedHashMap::new)) + .values()); } private static List unionDistinctLists(final List left, final List right, int trust) { @@ -281,10 +288,10 @@ public class MergeUtils { List l = trust >= 0 ? right : left; return Stream - .concat(h.stream(), l.stream()) - .filter(Objects::nonNull) - .distinct() - .collect(Collectors.toList()); + .concat(h.stream(), l.stream()) + .filter(Objects::nonNull) + .distinct() + .collect(Collectors.toList()); } private static List unionDistinctListOfString(final List l, final List r) { @@ -295,10 +302,10 @@ public class MergeUtils { } return Stream - .concat(l.stream(), r.stream()) - .filter(StringUtils::isNotBlank) - .distinct() - .collect(Collectors.toList()); + .concat(l.stream(), r.stream()) + .filter(StringUtils::isNotBlank) + .distinct() + .collect(Collectors.toList()); } // TODO review @@ -324,7 +331,7 @@ public class MergeUtils { } private static List unionTitle(List left, List right, - int trust) { + int trust) { if (left == null) { return right; } else if (right == null) { @@ -335,10 +342,10 @@ public class MergeUtils { List l = trust >= 0 ? right : left; return Stream - .concat(h.stream(), l.stream()) - .filter(Objects::isNull) - .distinct() - .collect(Collectors.toList()); + .concat(h.stream(), l.stream()) + .filter(Objects::isNull) + .distinct() + .collect(Collectors.toList()); } /** @@ -373,8 +380,8 @@ public class MergeUtils { merged.setPid(mergeLists(merged.getPid(), enrich.getPid(), trust, MergeUtils::spKeyExtractor, (p1, p2) -> p1)); merged.setDateofcollection(LocalDateTime.now().toString()); merged - .setDateoftransformation( - chooseString(merged.getDateoftransformation(), enrich.getDateoftransformation(), trust)); + .setDateoftransformation( + chooseString(merged.getDateoftransformation(), enrich.getDateoftransformation(), trust)); merged.setExtraInfo(unionDistinctLists(merged.getExtraInfo(), enrich.getExtraInfo(), trust)); // When merging records OAI provenance becomes null merged.setOaiprovenance(null); @@ -391,7 +398,7 @@ public class MergeUtils { checkArgument(Objects.equals(merge.getTarget(), enrich.getTarget()), "target ids must be equal"); checkArgument(Objects.equals(merge.getRelType(), enrich.getRelType()), "relType(s) must be equal"); checkArgument( - Objects.equals(merge.getSubRelType(), enrich.getSubRelType()), "subRelType(s) must be equal"); + Objects.equals(merge.getSubRelType(), enrich.getSubRelType()), "subRelType(s) must be equal"); checkArgument(Objects.equals(merge.getRelClass(), enrich.getRelClass()), "relClass(es) must be equal"); // merge.setProvenance(mergeLists(merge.getProvenance(), enrich.getProvenance())); @@ -402,10 +409,10 @@ public class MergeUtils { merge.setValidationDate(ModelSupport.oldest(merge.getValidationDate(), enrich.getValidationDate())); } catch (ParseException e) { throw new IllegalArgumentException(String - .format( - "invalid validation date format in relation [s:%s, t:%s]: %s", merge.getSource(), - merge.getTarget(), - merge.getValidationDate())); + .format( + "invalid validation date format in relation [s:%s, t:%s]: %s", merge.getSource(), + merge.getTarget(), + merge.getValidationDate())); } // TODO keyvalue merge @@ -419,7 +426,7 @@ public class MergeUtils { T merge = mergeOafEntityFields(original, enrich, trust); if (merge.getProcessingchargeamount() == null - || StringUtils.isBlank(merge.getProcessingchargeamount().getValue())) { + || StringUtils.isBlank(merge.getProcessingchargeamount().getValue())) { merge.setProcessingchargeamount(enrich.getProcessingchargeamount()); merge.setProcessingchargecurrency(enrich.getProcessingchargecurrency()); } @@ -451,8 +458,8 @@ public class MergeUtils { } merge - .setDateofacceptance( - mergeDateOfAcceptance(merge.getDateofacceptance(), enrich.getDateofacceptance(), trust)); + .setDateofacceptance( + mergeDateOfAcceptance(merge.getDateofacceptance(), enrich.getDateofacceptance(), trust)); merge.setPublisher(coalesce(merge.getPublisher(), enrich.getPublisher())); merge.setEmbargoenddate(coalesce(merge.getEmbargoenddate(), enrich.getEmbargoenddate())); @@ -467,7 +474,7 @@ public class MergeUtils { merge.setCoverage(unionDistinctLists(merge.getCoverage(), enrich.getCoverage(), trust)); if (enrich.getBestaccessright() != null - && new AccessRightComparator<>() + && new AccessRightComparator<>() .compare(enrich.getBestaccessright(), merge.getBestaccessright()) < 0) { merge.setBestaccessright(enrich.getBestaccessright()); } @@ -480,8 +487,8 @@ public class MergeUtils { // ok merge - .setExternalReference( - mergeExternalReference(merge.getExternalReference(), enrich.getExternalReference(), trust)); + .setExternalReference( + mergeExternalReference(merge.getExternalReference(), enrich.getExternalReference(), trust)); // instance enrichment or union // review instance equals => add pid to comparision @@ -489,17 +496,17 @@ public class MergeUtils { merge.setInstance(mergeInstances(merge.getInstance(), enrich.getInstance(), trust)); } else { final List enrichmentInstances = isAnEnrichment(merge) ? merge.getInstance() - : enrich.getInstance(); + : enrich.getInstance(); final List enrichedInstances = isAnEnrichment(merge) ? enrich.getInstance() - : merge.getInstance(); + : merge.getInstance(); if (isAnEnrichment(merge)) merge.setDataInfo(enrich.getDataInfo()); merge.setInstance(enrichInstances(enrichedInstances, enrichmentInstances)); } merge - .setEoscifguidelines( - mergeEosciifguidelines(merge.getEoscifguidelines(), enrich.getEoscifguidelines(), trust)); + .setEoscifguidelines( + mergeEosciifguidelines(merge.getEoscifguidelines(), enrich.getEoscifguidelines(), trust)); merge.setIsGreen(booleanOR(merge.getIsGreen(), enrich.getIsGreen())); // OK but should be list of values merge.setOpenAccessColor(coalesce(merge.getOpenAccessColor(), enrich.getOpenAccessColor())); @@ -525,7 +532,7 @@ public class MergeUtils { LocalDate enrich_date = LocalDate.parse(enrich.getValue(), DateTimeFormatter.ISO_DATE); if (enrich_date.getYear() > 1300 - && (merge_date.getYear() < 1300 || merge_date.isAfter(enrich_date))) { + && (merge_date.getYear() < 1300 || merge_date.isAfter(enrich_date))) { return enrich; } } catch (NullPointerException | DateTimeParseException e) { @@ -543,56 +550,56 @@ public class MergeUtils { private static List mergeInstances(List v1, List v2, int trust) { return mergeLists( - v1, v2, trust, - MergeUtils::instanceKeyExtractor, - MergeUtils::instanceMerger); + v1, v2, trust, + MergeUtils::instanceKeyExtractor, + MergeUtils::instanceMerger); } private static List mergeEosciifguidelines(List v1, List v2, - int trust) { + int trust) { return mergeLists( - v1, v2, trust, er -> Joiner - .on("||") - .useForNull("") - .join(er.getCode(), er.getLabel(), er.getUrl(), er.getSemanticRelation()), - (r, l) -> r); + v1, v2, trust, er -> Joiner + .on("||") + .useForNull("") + .join(er.getCode(), er.getLabel(), er.getUrl(), er.getSemanticRelation()), + (r, l) -> r); } private static List mergeExternalReference(List v1, - List v2, int trust) { + List v2, int trust) { return mergeLists( - v1, v2, trust, er -> Joiner - .on(',') - .useForNull("") - .join( - er.getSitename(), er.getLabel(), - er.getUrl(), toString(er.getQualifier()), er.getRefidentifier(), - er.getQuery(), toString(er.getDataInfo())), - (r, l) -> r); + v1, v2, trust, er -> Joiner + .on(',') + .useForNull("") + .join( + er.getSitename(), er.getLabel(), + er.getUrl(), toString(er.getQualifier()), er.getRefidentifier(), + er.getQuery(), toString(er.getDataInfo())), + (r, l) -> r); } private static String toString(DataInfo di) { return Joiner - .on(',') - .useForNull("") - .join( - di.getInvisible(), di.getInferred(), di.getDeletedbyinference(), di.getTrust(), - di.getInferenceprovenance(), toString(di.getProvenanceaction())); + .on(',') + .useForNull("") + .join( + di.getInvisible(), di.getInferred(), di.getDeletedbyinference(), di.getTrust(), + di.getInferenceprovenance(), toString(di.getProvenanceaction())); } private static String toString(Qualifier q) { return Joiner - .on(',') - .useForNull("") - .join(q.getClassid(), q.getClassname(), q.getSchemeid(), q.getSchemename()); + .on(',') + .useForNull("") + .join(q.getClassid(), q.getClassname(), q.getSchemeid(), q.getSchemename()); } private static String toString(StructuredProperty sp) { return Joiner - .on(',') - .useForNull("") - .join(toString(sp.getQualifier()), sp.getValue()); + .on(',') + .useForNull("") + .join(toString(sp.getQualifier()), sp.getValue()); } private static List mergeStructuredProperties(List v1, List v2, int trust) { @@ -631,17 +638,17 @@ public class MergeUtils { // 2. @@ // 3. || return String - .join( - "::", - kvKeyExtractor(i.getHostedby()), - kvKeyExtractor(i.getCollectedfrom()), - qualifierKeyExtractor(i.getAccessright()), - qualifierKeyExtractor(i.getInstancetype()), - Optional.ofNullable(i.getUrl()).map(u -> String.join("@@", u)).orElse(null), - Optional - .ofNullable(i.getPid()) - .map(pp -> pp.stream().map(MergeUtils::spKeyExtractor).collect(Collectors.joining("@@"))) - .orElse(null)); + .join( + "::", + kvKeyExtractor(i.getHostedby()), + kvKeyExtractor(i.getCollectedfrom()), + qualifierKeyExtractor(i.getAccessright()), + qualifierKeyExtractor(i.getInstancetype()), + Optional.ofNullable(i.getUrl()).map(u -> String.join("@@", u)).orElse(null), + Optional + .ofNullable(i.getPid()) + .map(pp -> pp.stream().map(MergeUtils::spKeyExtractor).collect(Collectors.joining("@@"))) + .orElse(null)); } private static Instance instanceMerger(Instance i1, Instance i2) { @@ -652,30 +659,30 @@ public class MergeUtils { i.setInstancetype(i1.getInstancetype()); i.setPid(mergeLists(i1.getPid(), i2.getPid(), 0, MergeUtils::spKeyExtractor, (sp1, sp2) -> sp1)); i - .setAlternateIdentifier( - mergeLists( - i1.getAlternateIdentifier(), i2.getAlternateIdentifier(), 0, MergeUtils::spKeyExtractor, - (sp1, sp2) -> sp1)); + .setAlternateIdentifier( + mergeLists( + i1.getAlternateIdentifier(), i2.getAlternateIdentifier(), 0, MergeUtils::spKeyExtractor, + (sp1, sp2) -> sp1)); i - .setRefereed( - Collections - .min( - Stream.of(i1.getRefereed(), i2.getRefereed()).collect(Collectors.toList()), - new RefereedComparator())); + .setRefereed( + Collections + .min( + Stream.of(i1.getRefereed(), i2.getRefereed()).collect(Collectors.toList()), + new RefereedComparator())); i - .setInstanceTypeMapping( - mergeLists( - i1.getInstanceTypeMapping(), i2.getInstanceTypeMapping(), 0, - MergeUtils::instanceTypeMappingKeyExtractor, (itm1, itm2) -> itm1)); + .setInstanceTypeMapping( + mergeLists( + i1.getInstanceTypeMapping(), i2.getInstanceTypeMapping(), 0, + MergeUtils::instanceTypeMappingKeyExtractor, (itm1, itm2) -> itm1)); i.setFulltext(selectFulltext(i1.getFulltext(), i2.getFulltext())); i.setDateofacceptance(selectOldestDate(i1.getDateofacceptance(), i2.getDateofacceptance())); i.setLicense(coalesce(i1.getLicense(), i2.getLicense())); i.setProcessingchargeamount(coalesce(i1.getProcessingchargeamount(), i2.getProcessingchargeamount())); i.setProcessingchargecurrency(coalesce(i1.getProcessingchargecurrency(), i2.getProcessingchargecurrency())); i - .setMeasures( - mergeLists(i1.getMeasures(), i2.getMeasures(), 0, MergeUtils::measureKeyExtractor, (m1, m2) -> m1)); + .setMeasures( + mergeLists(i1.getMeasures(), i2.getMeasures(), 0, MergeUtils::measureKeyExtractor, (m1, m2) -> m1)); i.setUrl(unionDistinctListOfString(i1.getUrl(), i2.getUrl())); @@ -684,14 +691,14 @@ public class MergeUtils { private static String measureKeyExtractor(Measure m) { return String - .join( - "::", - m.getId(), - m - .getUnit() - .stream() - .map(KeyValue::getKey) - .collect(Collectors.joining("::"))); + .join( + "::", + m.getId(), + m + .getUnit() + .stream() + .map(KeyValue::getKey) + .collect(Collectors.joining("::"))); } private static Field selectOldestDate(Field d1, Field d2) { @@ -702,16 +709,16 @@ public class MergeUtils { } return Stream - .of(d1, d2) - .min( - Comparator - .comparing( - f -> DateParserUtils - .parseDate(f.getValue()) - .toInstant() - .atZone(ZoneId.systemDefault()) - .toLocalDate())) - .orElse(d1); + .of(d1, d2) + .min( + Comparator + .comparing( + f -> DateParserUtils + .parseDate(f.getValue()) + .toInstant() + .atZone(ZoneId.systemDefault()) + .toLocalDate())) + .orElse(d1); } private static String selectFulltext(String ft1, String ft2) { @@ -726,12 +733,12 @@ public class MergeUtils { private static String instanceTypeMappingKeyExtractor(InstanceTypeMapping itm) { return String - .join( - "::", - itm.getOriginalType(), - itm.getTypeCode(), - itm.getTypeLabel(), - itm.getVocabularyName()); + .join( + "::", + itm.getOriginalType(), + itm.getTypeCode(), + itm.getTypeLabel(), + itm.getVocabularyName()); } private static String kvKeyExtractor(KeyValue kv) { @@ -748,13 +755,13 @@ public class MergeUtils { private static String spKeyExtractor(StructuredProperty sp) { return Optional - .ofNullable(sp) - .map( - s -> Joiner - .on("||") - .useForNull("") - .join(qualifierKeyExtractor(s.getQualifier()), s.getValue())) - .orElse(null); + .ofNullable(sp) + .map( + s -> Joiner + .on("||") + .useForNull("") + .join(qualifierKeyExtractor(s.getQualifier()), s.getValue())) + .orElse(null); } private static T mergeORP(T original, T enrich) { @@ -776,8 +783,8 @@ public class MergeUtils { merge.setLicense(unionDistinctLists(merge.getLicense(), enrich.getLicense(), trust)); merge.setCodeRepositoryUrl(chooseReference(merge.getCodeRepositoryUrl(), enrich.getCodeRepositoryUrl(), trust)); merge - .setProgrammingLanguage( - chooseReference(merge.getProgrammingLanguage(), enrich.getProgrammingLanguage(), trust)); + .setProgrammingLanguage( + chooseReference(merge.getProgrammingLanguage(), enrich.getProgrammingLanguage(), trust)); return merge; } @@ -791,11 +798,11 @@ public class MergeUtils { merge.setSize(chooseReference(merge.getSize(), enrich.getSize(), trust)); merge.setVersion(chooseReference(merge.getVersion(), enrich.getVersion(), trust)); merge - .setLastmetadataupdate( - chooseReference(merge.getLastmetadataupdate(), enrich.getLastmetadataupdate(), trust)); + .setLastmetadataupdate( + chooseReference(merge.getLastmetadataupdate(), enrich.getLastmetadataupdate(), trust)); merge - .setMetadataversionnumber( - chooseReference(merge.getMetadataversionnumber(), enrich.getMetadataversionnumber(), trust)); + .setMetadataversionnumber( + chooseReference(merge.getMetadataversionnumber(), enrich.getMetadataversionnumber(), trust)); merge.setGeolocation(unionDistinctLists(merge.getGeolocation(), enrich.getGeolocation(), trust)); return merge; @@ -817,26 +824,26 @@ public class MergeUtils { merged.setLegalshortname(chooseReference(merged.getLegalshortname(), enrich.getLegalshortname(), trust)); merged.setLegalname(chooseReference(merged.getLegalname(), enrich.getLegalname(), trust)); merged - .setAlternativeNames(unionDistinctLists(enrich.getAlternativeNames(), merged.getAlternativeNames(), trust)); + .setAlternativeNames(unionDistinctLists(enrich.getAlternativeNames(), merged.getAlternativeNames(), trust)); merged.setWebsiteurl(chooseReference(merged.getWebsiteurl(), enrich.getWebsiteurl(), trust)); merged.setLogourl(chooseReference(merged.getLogourl(), enrich.getLogourl(), trust)); merged.setEclegalbody(chooseReference(merged.getEclegalbody(), enrich.getEclegalbody(), trust)); merged.setEclegalperson(chooseReference(merged.getEclegalperson(), enrich.getEclegalperson(), trust)); merged.setEcnonprofit(chooseReference(merged.getEcnonprofit(), enrich.getEcnonprofit(), trust)); merged - .setEcresearchorganization( - chooseReference(merged.getEcresearchorganization(), enrich.getEcresearchorganization(), trust)); + .setEcresearchorganization( + chooseReference(merged.getEcresearchorganization(), enrich.getEcresearchorganization(), trust)); merged - .setEchighereducation(chooseReference(merged.getEchighereducation(), enrich.getEchighereducation(), trust)); + .setEchighereducation(chooseReference(merged.getEchighereducation(), enrich.getEchighereducation(), trust)); merged - .setEcinternationalorganizationeurinterests( - chooseReference( - merged.getEcinternationalorganizationeurinterests(), - enrich.getEcinternationalorganizationeurinterests(), trust)); + .setEcinternationalorganizationeurinterests( + chooseReference( + merged.getEcinternationalorganizationeurinterests(), + enrich.getEcinternationalorganizationeurinterests(), trust)); merged - .setEcinternationalorganization( - chooseReference( - merged.getEcinternationalorganization(), enrich.getEcinternationalorganization(), trust)); + .setEcinternationalorganization( + chooseReference( + merged.getEcinternationalorganization(), enrich.getEcinternationalorganization(), trust)); merged.setEcenterprise(chooseReference(merged.getEcenterprise(), enrich.getEcenterprise(), trust)); merged.setEcsmevalidated(chooseReference(merged.getEcsmevalidated(), enrich.getEcsmevalidated(), trust)); merged.setEcnutscode(chooseReference(merged.getEcnutscode(), enrich.getEcnutscode(), trust)); @@ -860,8 +867,8 @@ public class MergeUtils { merged.setDuration(chooseReference(merged.getDuration(), enrich.getDuration(), trust)); merged.setEcsc39(chooseReference(merged.getEcsc39(), enrich.getEcsc39(), trust)); merged - .setOamandatepublications( - chooseReference(merged.getOamandatepublications(), enrich.getOamandatepublications(), trust)); + .setOamandatepublications( + chooseReference(merged.getOamandatepublications(), enrich.getOamandatepublications(), trust)); merged.setEcarticle29_3(chooseReference(merged.getEcarticle29_3(), enrich.getEcarticle29_3(), trust)); merged.setSubjects(unionDistinctLists(merged.getSubjects(), enrich.getSubjects(), trust)); merged.setFundingtree(unionDistinctLists(merged.getFundingtree(), enrich.getFundingtree(), trust)); @@ -887,8 +894,8 @@ public class MergeUtils { } merged - .setH2020classification( - unionDistinctLists(merged.getH2020classification(), enrich.getH2020classification(), trust)); + .setH2020classification( + unionDistinctLists(merged.getH2020classification(), enrich.getH2020classification(), trust)); return merged; } @@ -915,7 +922,7 @@ public class MergeUtils { * @return list of instances possibly enriched */ private static List enrichInstances(final List toEnrichInstances, - final List enrichmentInstances) { + final List enrichmentInstances) { final List enrichmentResult = new ArrayList<>(); if (toEnrichInstances == null) { @@ -953,42 +960,42 @@ public class MergeUtils { */ private static Map toInstanceMap(final List ri) { return ri - .stream() - .filter(i -> i.getPid() != null || i.getAlternateIdentifier() != null) - .flatMap(i -> { - final List> result = new ArrayList<>(); - if (i.getPid() != null) - i - .getPid() - .stream() - .filter(MergeUtils::validPid) - .forEach(p -> result.add(new ImmutablePair<>(extractKeyFromPid(p), i))); - if (i.getAlternateIdentifier() != null) - i - .getAlternateIdentifier() - .stream() - .filter(MergeUtils::validPid) - .forEach(p -> result.add(new ImmutablePair<>(extractKeyFromPid(p), i))); - return result.stream(); - }) - .collect( - Collectors - .toMap( - Pair::getLeft, - Pair::getRight, - (a, b) -> a)); + .stream() + .filter(i -> i.getPid() != null || i.getAlternateIdentifier() != null) + .flatMap(i -> { + final List> result = new ArrayList<>(); + if (i.getPid() != null) + i + .getPid() + .stream() + .filter(MergeUtils::validPid) + .forEach(p -> result.add(new ImmutablePair<>(extractKeyFromPid(p), i))); + if (i.getAlternateIdentifier() != null) + i + .getAlternateIdentifier() + .stream() + .filter(MergeUtils::validPid) + .forEach(p -> result.add(new ImmutablePair<>(extractKeyFromPid(p), i))); + return result.stream(); + }) + .collect( + Collectors + .toMap( + Pair::getLeft, + Pair::getRight, + (a, b) -> a)); } private static boolean isFromDelegatedAuthority(Result r) { return Optional - .ofNullable(r.getInstance()) - .map( - instance -> instance - .stream() - .filter(i -> Objects.nonNull(i.getCollectedfrom())) - .map(i -> i.getCollectedfrom().getKey()) - .anyMatch(cfId -> IdentifierFactory.delegatedAuthorityDatasourceIds().contains(cfId))) - .orElse(false); + .ofNullable(r.getInstance()) + .map( + instance -> instance + .stream() + .filter(i -> Objects.nonNull(i.getCollectedfrom())) + .map(i -> i.getCollectedfrom().getKey()) + .anyMatch(cfId -> IdentifierFactory.delegatedAuthorityDatasourceIds().contains(cfId))) + .orElse(false); } /** @@ -1024,15 +1031,15 @@ public class MergeUtils { * @return the list */ private static List findEnrichmentsByPID(final List pids, - final Map enrichments) { + final Map enrichments) { if (pids == null || enrichments == null) return null; return pids - .stream() - .map(MergeUtils::extractKeyFromPid) - .map(enrichments::get) - .filter(Objects::nonNull) - .collect(Collectors.toList()); + .stream() + .map(MergeUtils::extractKeyFromPid) + .map(enrichments::get) + .filter(Objects::nonNull) + .collect(Collectors.toList()); } /** @@ -1043,8 +1050,8 @@ public class MergeUtils { */ private static boolean isAnEnrichment(OafEntity e) { return e.getDataInfo() != null && - e.getDataInfo().getProvenanceaction() != null - && ModelConstants.PROVENANCE_ENRICH.equalsIgnoreCase(e.getDataInfo().getProvenanceaction().getClassid()); + e.getDataInfo().getProvenanceaction() != null + && ModelConstants.PROVENANCE_ENRICH.equalsIgnoreCase(e.getDataInfo().getProvenanceaction().getClassid()); } /** @@ -1067,17 +1074,17 @@ public class MergeUtils { merge.setHostedby(firstNonNull(merge.getHostedby(), enrichment.getHostedby())); merge.setUrl(unionDistinctLists(merge.getUrl(), enrichment.getUrl(), 0)); merge - .setDistributionlocation( - firstNonNull(merge.getDistributionlocation(), enrichment.getDistributionlocation())); + .setDistributionlocation( + firstNonNull(merge.getDistributionlocation(), enrichment.getDistributionlocation())); merge.setCollectedfrom(firstNonNull(merge.getCollectedfrom(), enrichment.getCollectedfrom())); // pid and alternateId are used for matching merge.setDateofacceptance(firstNonNull(merge.getDateofacceptance(), enrichment.getDateofacceptance())); merge - .setProcessingchargeamount( - firstNonNull(merge.getProcessingchargeamount(), enrichment.getProcessingchargeamount())); + .setProcessingchargeamount( + firstNonNull(merge.getProcessingchargeamount(), enrichment.getProcessingchargeamount())); merge - .setProcessingchargecurrency( - firstNonNull(merge.getProcessingchargecurrency(), enrichment.getProcessingchargecurrency())); + .setProcessingchargecurrency( + firstNonNull(merge.getProcessingchargecurrency(), enrichment.getProcessingchargecurrency())); merge.setRefereed(firstNonNull(merge.getRefereed(), enrichment.getRefereed())); merge.setMeasures(unionDistinctLists(merge.getMeasures(), enrichment.getMeasures(), 0)); merge.setFulltext(firstNonNull(merge.getFulltext(), enrichment.getFulltext())); @@ -1085,14 +1092,14 @@ public class MergeUtils { private static int compareTrust(Oaf a, Oaf b) { String left = Optional - .ofNullable(a.getDataInfo()) - .map(DataInfo::getTrust) - .orElse("0.0"); + .ofNullable(a.getDataInfo()) + .map(DataInfo::getTrust) + .orElse("0.0"); String right = Optional - .ofNullable(b.getDataInfo()) - .map(DataInfo::getTrust) - .orElse("0.0"); + .ofNullable(b.getDataInfo()) + .map(DataInfo::getTrust) + .orElse("0.0"); return left.compareTo(right); } From 5f512f510e92a717f1e536b9be9db15399d42805 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 15 Nov 2024 09:16:51 +0100 Subject: [PATCH 26/73] code formatting --- .../raw/AbstractMdRecordToOafMapper.java | 26 +++++++++++-------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java index be84778f5..881d3202c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java @@ -155,10 +155,11 @@ public abstract class AbstractMdRecordToOafMapper { final Instance instance = prepareInstances(doc, entityInfo, collectedFrom, hostedBy); - if (!Optional.ofNullable(instance.getInstancetype()) - .map(Qualifier::getClassid) - .filter(StringUtils::isNotBlank) - .isPresent()) { + if (!Optional + .ofNullable(instance.getInstancetype()) + .map(Qualifier::getClassid) + .filter(StringUtils::isNotBlank) + .isPresent()) { return Lists.newArrayList(); } @@ -173,13 +174,16 @@ public abstract class AbstractMdRecordToOafMapper { protected String getResultType(final Instance instance) { if (this.vocs.vocabularyExists(ModelConstants.DNET_RESULT_TYPOLOGIES)) { - return Optional.ofNullable(instance.getInstancetype()) - .map(Qualifier::getClassid) - .map(instanceType -> Optional - .ofNullable(this.vocs.getSynonymAsQualifier(ModelConstants.DNET_RESULT_TYPOLOGIES, instanceType)) - .map(Qualifier::getClassid) - .orElse("0000")) - .orElse("0000"); + return Optional + .ofNullable(instance.getInstancetype()) + .map(Qualifier::getClassid) + .map( + instanceType -> Optional + .ofNullable( + this.vocs.getSynonymAsQualifier(ModelConstants.DNET_RESULT_TYPOLOGIES, instanceType)) + .map(Qualifier::getClassid) + .orElse("0000")) + .orElse("0000"); } else { throw new IllegalStateException("Missing vocabulary: " + ModelConstants.DNET_RESULT_TYPOLOGIES); } From cf7d9a32ab847d50bcafe03bc66795d2719e7d03 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 15 Nov 2024 09:17:28 +0100 Subject: [PATCH 27/73] disable autoBroadcastJoin in the cleaning workflow --- .../eu/dnetlib/dhp/oa/graph/clean/oozie_app/workflow.xml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/clean/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/clean/oozie_app/workflow.xml index 2512fc5bc..01aaadae5 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/clean/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/clean/oozie_app/workflow.xml @@ -162,6 +162,7 @@ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.autoBroadcastJoinThreshold=-1 --conf spark.sql.shuffle.partitions=15000 --inputPath${graphInputPath}/publication @@ -197,6 +198,7 @@ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.autoBroadcastJoinThreshold=-1 --conf spark.sql.shuffle.partitions=8000 --inputPath${graphInputPath}/dataset @@ -232,6 +234,7 @@ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.autoBroadcastJoinThreshold=-1 --conf spark.sql.shuffle.partitions=5000 --inputPath${graphInputPath}/otherresearchproduct @@ -267,6 +270,7 @@ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.autoBroadcastJoinThreshold=-1 --conf spark.sql.shuffle.partitions=2000 --inputPath${graphInputPath}/software @@ -302,6 +306,7 @@ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.autoBroadcastJoinThreshold=-1 --conf spark.sql.shuffle.partitions=1000 --inputPath${graphInputPath}/datasource @@ -337,6 +342,7 @@ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.autoBroadcastJoinThreshold=-1 --conf spark.sql.shuffle.partitions=1000 --inputPath${graphInputPath}/organization @@ -372,6 +378,7 @@ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.autoBroadcastJoinThreshold=-1 --conf spark.sql.shuffle.partitions=2000 --inputPath${graphInputPath}/project @@ -407,6 +414,7 @@ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.autoBroadcastJoinThreshold=-1 --conf spark.sql.shuffle.partitions=2000 --inputPath${graphInputPath}/person @@ -442,6 +450,7 @@ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.autoBroadcastJoinThreshold=-1 --conf spark.sql.shuffle.partitions=20000 --inputPath${graphInputPath}/relation From 9e439f5ecaea8c92ca7db4e62158289766fee84e Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 15 Nov 2024 12:19:26 +0100 Subject: [PATCH 28/73] map the abstracts considering both the datacite and the dc nsPrefix --- .../dhp/oa/graph/raw/OdfToOafMapper.java | 2 +- .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 23 +++++ .../dhp/oa/graph/raw/odf_guidelines4.xml | 95 +++++++++++++++++++ 3 files changed, 119 insertions(+), 1 deletion(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_guidelines4.xml diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java index a811aad46..efe05eb68 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java @@ -319,7 +319,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { @Override protected List> prepareDescriptions(final Document doc, final DataInfo info) { - return prepareListFields(doc, "//*[local-name()='description' and ./@descriptionType='Abstract']", info); + return prepareListFields(doc, "//datacite:description[./@descriptionType='Abstract'] | //dc:description", info); } @Override diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index 2cf3ea0c0..ea9503d17 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -906,6 +906,29 @@ class MappersTest { assertEquals("IT", p.getCountry().get(0).getClassid()); assertEquals("FR", p.getCountry().get(1).getClassid()); assertEquals("DE", p.getCountry().get(2).getClassid()); + + assertNotNull(p.getDescription()); + assertEquals(1, p.getDescription().size()); + assertNotNull(p.getDescription().get(0)); + assertTrue(StringUtils.isNotBlank(p.getDescription().get(0).getValue())); + } + + @Test + void testODFRecord_guidelines4() throws IOException { + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_guidelines4.xml"))); + final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); + + final Publication p = (Publication) list.get(0); + assertValidId(p.getId()); + assertValidId(p.getCollectedfrom().get(0).getKey()); + assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue())); + + assertNotNull(p.getDescription()); + assertEquals(2, p.getDescription().size()); + assertNotNull(p.getDescription().get(0)); + assertTrue(StringUtils.isNotBlank(p.getDescription().get(0).getValue())); + assertNotNull(p.getDescription().get(1)); + assertTrue(StringUtils.isNotBlank(p.getDescription().get(1).getValue())); } @Test diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_guidelines4.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_guidelines4.xml new file mode 100644 index 000000000..4f390afb7 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_guidelines4.xml @@ -0,0 +1,95 @@ + + +
+ od______1261::66de86a37fa980b9b8f4cefdf1c5a0c5 + oai:air.unimi.it:2434/636308 + 2020-09-04T14:36:48.411Z + od______1261 + oai:air.unimi.it:2434/636308 + 2022-10-14T14:17:28Z + com_2434_73555 + col_2434_73557 + openaire + 2024-11-07T06:17:52.454Z +
+ + + + I processi incorpati di costruzione della conoscenza nelle pratiche di cura + + + + S. Visioli + rp17381 + University of Milan + 53567 + + + L. Zannini + 0000-0001-7287-6616 + rp31993 + University of Milan + 5556 + + + 487 + 2 + 506 + 8 + METIS + + 10.30557/MT00041 + http://hdl.handle.net/2434/636308 + + + 2018 + 2018 + 2019-05-23 + + ita + journal article + In ambito educativo, il corpo è stato tradizionalmente marginalizzato o rifiutato come fonte di conoscenza. Negli ultimi decenni, la letteratura pedagogica ha manifestato crescente interesse per il ruolo che ha il corpo dell’insegnante, nel pianificare e realizzare l’insegnamento, sfidando le ideologie e le epistemologie dominanti, secondo le quali la nostra mente è la fonte primaria di apprendimento. Al contrario, una pedagogia della conoscenza incorpata (embodiment) considera il corpo centrale nel nostro conoscere (le pratiche d’insegnamento) e nel nostro essere (insegnanti). Analogamente, la letteratura delle professioni del caring si è rivelata molto attenta ai temi della conoscenza incorpata, ossia di quel sapere che è presente in un corpo, spesso in modo tacito. Le infermiere hanno da tempo esplorato il ruolo del loro corpo nei processi di costruzione della conoscenza, come anche il ruolo del corpo dello studente, e addirittura del paziente, nel conoscere la malattia. In questo paper riportiamo alcune riflessioni ed esperienze sulla conoscenza incorpata nelle pratiche assistenziali, mettendo in luce come questa, benché spesso tacita, abbia un ruolo centrale nell’aver cura delle persone malate. + In educational contexts, the body has been traditionally marginalized or rejected as a source of knowledge. In the last decades, some pedagogical contributions have shown interest for the role that the teacher’s body has in planning and implementing teaching, challenging dominant ideologies and epistemologies that tell us our minds are the primary sources of learning. Conversely, a pedagogy of embodiment makes the body central in our knowing (educational practices) and being (a teacher). Similarly, literature from the caring professions has revealed very receptive to-ward issues related to embodied knowledge, that is to say the knowledge that the body owns, which is frequently tacit. Since many years, nurses have explored the role of body in the processes of knowledge building, as well as the role of the student’s body, and even the patient’s body, in knowing the illness. In this paper we report some reflections and experiences concerning embodied knowledge in the nursing practice, highlighting that embodied knowledge, even if tacit, has a pivotal role when caring for sick people. + application/pdf + 2434/636308 + open access + + body; educational practices; embodied knowledge; nursing; teaching + + + + 413926 bytes + + + https://air.unimi.it/bitstream/2434/636308/2/Finale%20pubblicato.pdf + + https://air.unimi.it/bitstream/2434/636308/2/Finale%20pubblicato.pdf + 0001 + 2018-01-01 + OPEN + ita + + + +
\ No newline at end of file From a1297082e2df74e672512334238143542ad323ce Mon Sep 17 00:00:00 2001 From: "sandro.labruzzo" Date: Tue, 19 Nov 2024 14:57:18 +0100 Subject: [PATCH 29/73] Crossref Enhancements: -Accurate Review Type Assignment: Resolved an issue identified in ticket https://support.openaire.eu/issues/9525#note-13. When a relationship of "is-review-of" is detected, the publication type is now correctly set to "Review." -Enhanced Author Affiliation Data: Implemented Miriam's suggestion by including a new field, "RawAffiliationString," in each author entry. This additional data provides a more granular level of detail regarding author affiliations, potentially improving discoverability and research analysis. --- .../collection/crossref/Crossref2Oaf.scala | 67 ++--- .../collection/crossref/affiliationTest.json | 232 ++++++++++++++++++ .../crossref/CrossrefMappingTest.scala | 12 +- 3 files changed, 265 insertions(+), 46 deletions(-) create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/crossref/affiliationTest.json diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/crossref/Crossref2Oaf.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/crossref/Crossref2Oaf.scala index e7d68920b..e15312e43 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/crossref/Crossref2Oaf.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/crossref/Crossref2Oaf.scala @@ -37,7 +37,7 @@ case class mappingAuthor( family: Option[String], sequence: Option[String], ORCID: Option[String], - affiliation: Option[mappingAffiliation] + affiliation: Option[List[mappingAffiliation]] ) {} case class funderInfo(id: String, uri: String, name: String, synonym: List[String]) {} @@ -457,15 +457,14 @@ case object Crossref2Oaf { } //Mapping Author - val authorList: List[mappingAuthor] = - (json \ "author").extract[List[mappingAuthor]].filter(a => a.family.isDefined) + val authorList: List[mappingAuthor] = (json \ "author").extract[List[mappingAuthor]].filter(a => a.family.isDefined) val sorted_list = authorList.sortWith((a: mappingAuthor, b: mappingAuthor) => a.sequence.isDefined && a.sequence.get.equalsIgnoreCase("first") ) result.setAuthor(sorted_list.zipWithIndex.map { case (a, index) => - generateAuhtor(a.given.orNull, a.family.get, a.ORCID.orNull, index) + generateAuthor(a.given.orNull, a.family.get, a.ORCID.orNull, index, a.affiliation) }.asJava) // Mapping instance @@ -504,18 +503,6 @@ case object Crossref2Oaf { ) } - val is_review = json \ "relation" \ "is-review-of" \ "id" - - if (is_review != JNothing) { - instance.setInstancetype( - OafMapperUtils.qualifier( - "0015", - "peerReviewed", - ModelConstants.DNET_REVIEW_LEVELS, - ModelConstants.DNET_REVIEW_LEVELS - ) - ) - } if (doi.startsWith("10.3410") || doi.startsWith("10.12703")) instance.setHostedby( @@ -569,17 +556,24 @@ case object Crossref2Oaf { result } + + def generateIdentifier(oaf: Result, doi: String): String = { val id = DHPUtils.md5(doi.toLowerCase) s"50|doiboost____|$id" } - def generateAuhtor(given: String, family: String, orcid: String, index: Int): Author = { + private def generateAuthor(given: String, family: String, orcid: String, index: Int, affiliation: Option[List[mappingAffiliation]]): Author = { val a = new Author a.setName(given) a.setSurname(family) a.setFullname(s"$given $family") a.setRank(index + 1) + + // Adding Raw affiliation if it's defined + if (affiliation.isDefined) { + a.setRawAffiliationString(affiliation.get.map(a => a.name).asJava) + } if (StringUtils.isNotBlank(orcid)) a.setPid( List( @@ -705,11 +699,21 @@ case object Crossref2Oaf { val objectType = (json \ "type").extractOrElse[String](null) if (objectType == null) return resultList - val typology = getTypeQualifier(objectType, vocabularies) + + + // If the item has a relations is-review-of, then we force it to a peer-review + val is_review = json \ "relation" \ "is-review-of" \ "id" + var force_to_review = false + if (is_review != JNothing) { + force_to_review = true + } + + val typology = getTypeQualifier(if (force_to_review) "peer-review" else objectType, vocabularies) if (typology == null) return List() + val result = generateItemFromType(typology._2) if (result == null) return List() @@ -757,33 +761,6 @@ case object Crossref2Oaf { else resultList } - - // if (uw != null) { -// result.getCollectedfrom.add(createUnpayWallCollectedFrom()) -// val i: Instance = new Instance() -// i.setCollectedfrom(createUnpayWallCollectedFrom()) -// if (uw.best_oa_location != null) { -// -// i.setUrl(List(uw.best_oa_location.url).asJava) -// if (uw.best_oa_location.license.isDefined) { -// i.setLicense(field[String](uw.best_oa_location.license.get, null)) -// } -// -// val colour = get_unpaywall_color(uw.oa_status) -// if (colour.isDefined) { -// val a = new AccessRight -// a.setClassid(ModelConstants.ACCESS_RIGHT_OPEN) -// a.setClassname(ModelConstants.ACCESS_RIGHT_OPEN) -// a.setSchemeid(ModelConstants.DNET_ACCESS_MODES) -// a.setSchemename(ModelConstants.DNET_ACCESS_MODES) -// a.setOpenAccessRoute(colour.get) -// i.setAccessright(a) -// } -// i.setPid(result.getPid) -// result.getInstance().add(i) -// } -// } - } private def createCiteRelation(source: Result, targetPid: String, targetPidType: String): List[Relation] = { diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/crossref/affiliationTest.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/crossref/affiliationTest.json new file mode 100644 index 000000000..201138e45 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/crossref/affiliationTest.json @@ -0,0 +1,232 @@ +{ + "indexed": { + "date-parts": [ + [ + 2022, + 4, + 3 + ] + ], + "date-time": "2022-04-03T01:45:59Z", + "timestamp": 1648950359167 + }, + "reference-count": 0, + "publisher": "American Society of Clinical Oncology (ASCO)", + "issue": "18_suppl", + "content-domain": { + "domain": [], + "crossmark-restriction": false + }, + "short-container-title": [ + "JCO" + ], + "published-print": { + "date-parts": [ + [ + 2007, + 6, + 20 + ] + ] + }, + "abstract": " 3507 Purpose: To detect IGF-1R on circulating tumor cells (CTCs) as a biomarker in the clinical development of a monoclonal human antibody, CP-751,871, targeting IGF-1R. Experimental Design: An automated sample preparation and analysis system for enumerating CTCs (Celltracks) was adapted for detecting IGF-1R positive CTCs with a diagnostic antibody targeting a different IGF-1R epitope to CP-751,871. This assay was utilized in three phase I trials of CP-751,871 as a single agent or with chemotherapy and was validated using cell lines and blood samples from healthy volunteers and patients with metastatic carcinoma. Results: There was no interference between the analytical and therapeutic antibodies. CP-751,871 was well tolerated as a single agent, and in combination with docetaxel or carboplatin and paclitaxel, at doses ranging from 0.05 mg/kg to 20 mg/kg. Eighty patients were enrolled on phase 1 studies of CP-751,871, with 47 (59%) patients having CTCs detected during the study. Prior to treatment 26 patients (33%) had CTCs, with 23 having detectable IGF-1R positive CTCs. CP-751,871 alone, and CP-751,871 with cytotoxic chemotherapy, decreased CTCs and IGF-1R positive CTCs; these increased towards the end of the 21-day cycle in some patients, falling again with retreatment. CTCs were commonest in advanced hormone refractory prostate cancer (11/20). Detectable IGF-1R expression on CTCs before treatment with CP-751,871 and docetaxel was associated with a higher frequency of PSA decline by more than 50% (6/10 vs 2/8 patients). A relationship was observed between sustained falls in CTCs counts and PSA declines by more than 50%. Conclusions: IGF-1R expression is detectable by immunofluorescence on CTCs. These data support the further evaluation of CTCs in pharmacodynamic studies and patient selection, particularly in advanced prostate cancer. No significant financial relationships to disclose. ", + "DOI": "10.1200/jco.2007.25.18_suppl.3507", + "type": "journal-article", + "created": { + "date-parts": [ + [ + 2020, + 3, + 6 + ] + ], + "date-time": "2020-03-06T20:50:42Z", + "timestamp": 1583527842000 + }, + "page": "3507-3507", + "source": "Crossref", + "is-referenced-by-count": 0, + "title": [ + "Circulating tumor cells expressing the insulin growth factor-1 receptor (IGF-1R): Method of detection, incidence and potential applications" + ], + "prefix": "10.1200", + "volume": "25", + "author": [ + { + "given": "J. S.", + "family": "de Bono", + "sequence": "first", + "affiliation": [ + { + "name": "Royal Marsden Hospital, Surrey, United Kingdom; Mayo Clinic, Rochester, MN; McGill University & Lady Davis Research Institute, Montreal, PQ, Canada; Pfizer Global Research & Development, New London, CT; Immunicon Corporation, Huntingdon Valley, PA" + } + ] + }, + { + "given": "A.", + "family": "Adjei", + "sequence": "additional", + "affiliation": [ + { + "name": "Royal Marsden Hospital, Surrey, United Kingdom; Mayo Clinic, Rochester, MN; McGill University & Lady Davis Research Institute, Montreal, PQ, Canada; Pfizer Global Research & Development, New London, CT; Immunicon Corporation, Huntingdon Valley, PA" + } + ] + }, + { + "given": "G.", + "family": "Attard", + "sequence": "additional", + "affiliation": [ + { + "name": "Royal Marsden Hospital, Surrey, United Kingdom; Mayo Clinic, Rochester, MN; McGill University & Lady Davis Research Institute, Montreal, PQ, Canada; Pfizer Global Research & Development, New London, CT; Immunicon Corporation, Huntingdon Valley, PA" + } + ] + }, + { + "given": "M.", + "family": "Pollak", + "sequence": "additional", + "affiliation": [ + { + "name": "Royal Marsden Hospital, Surrey, United Kingdom; Mayo Clinic, Rochester, MN; McGill University & Lady Davis Research Institute, Montreal, PQ, Canada; Pfizer Global Research & Development, New London, CT; Immunicon Corporation, Huntingdon Valley, PA" + } + ] + }, + { + "given": "P.", + "family": "Fong", + "sequence": "additional", + "affiliation": [ + { + "name": "Royal Marsden Hospital, Surrey, United Kingdom; Mayo Clinic, Rochester, MN; McGill University & Lady Davis Research Institute, Montreal, PQ, Canada; Pfizer Global Research & Development, New London, CT; Immunicon Corporation, Huntingdon Valley, PA" + } + ] + }, + { + "given": "P.", + "family": "Haluska", + "sequence": "additional", + "affiliation": [ + { + "name": "Royal Marsden Hospital, Surrey, United Kingdom; Mayo Clinic, Rochester, MN; McGill University & Lady Davis Research Institute, Montreal, PQ, Canada; Pfizer Global Research & Development, New London, CT; Immunicon Corporation, Huntingdon Valley, PA" + } + ] + }, + { + "given": "L.", + "family": "Roberts", + "sequence": "additional", + "affiliation": [ + { + "name": "Royal Marsden Hospital, Surrey, United Kingdom; Mayo Clinic, Rochester, MN; McGill University & Lady Davis Research Institute, Montreal, PQ, Canada; Pfizer Global Research & Development, New London, CT; Immunicon Corporation, Huntingdon Valley, PA" + } + ] + }, + { + "given": "D.", + "family": "Chainese", + "sequence": "additional", + "affiliation": [ + { + "name": "Royal Marsden Hospital, Surrey, United Kingdom; Mayo Clinic, Rochester, MN; McGill University & Lady Davis Research Institute, Montreal, PQ, Canada; Pfizer Global Research & Development, New London, CT; Immunicon Corporation, Huntingdon Valley, PA" + } + ] + }, + { + "given": "L.", + "family": "Terstappen", + "sequence": "additional", + "affiliation": [ + { + "name": "Royal Marsden Hospital, Surrey, United Kingdom; Mayo Clinic, Rochester, MN; McGill University & Lady Davis Research Institute, Montreal, PQ, Canada; Pfizer Global Research & Development, New London, CT; Immunicon Corporation, Huntingdon Valley, PA" + } + ] + }, + { + "given": "A.", + "family": "Gualberto", + "sequence": "additional", + "affiliation": [ + { + "name": "Royal Marsden Hospital, Surrey, United Kingdom; Mayo Clinic, Rochester, MN; McGill University & Lady Davis Research Institute, Montreal, PQ, Canada; Pfizer Global Research & Development, New London, CT; Immunicon Corporation, Huntingdon Valley, PA" + } + ] + } + ], + "member": "233", + "container-title": [ + "Journal of Clinical Oncology" + ], + "original-title": [], + "language": "en", + "deposited": { + "date-parts": [ + [ + 2020, + 3, + 6 + ] + ], + "date-time": "2020-03-06T20:51:03Z", + "timestamp": 1583527863000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "http://ascopubs.org/doi/10.1200/jco.2007.25.18_suppl.3507" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + 2007, + 6, + 20 + ] + ] + }, + "references-count": 0, + "journal-issue": { + "issue": "18_suppl", + "published-print": { + "date-parts": [ + [ + 2007, + 6, + 20 + ] + ] + } + }, + "alternative-id": [ + "10.1200/jco.2007.25.18_suppl.3507" + ], + "URL": "http://dx.doi.org/10.1200/jco.2007.25.18_suppl.3507", + "relation": {}, + "ISSN": [ + "0732-183X", + "1527-7755" + ], + "issn-type": [ + { + "value": "0732-183X", + "type": "print" + }, + { + "value": "1527-7755", + "type": "electronic" + } + ], + "subject": [], + "published": { + "date-parts": [ + [ + 2007, + 6, + 20 + ] + ] + } +} \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/collection/crossref/CrossrefMappingTest.scala b/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/collection/crossref/CrossrefMappingTest.scala index c3ea884eb..f6f71ca66 100644 --- a/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/collection/crossref/CrossrefMappingTest.scala +++ b/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/collection/crossref/CrossrefMappingTest.scala @@ -25,8 +25,18 @@ class CrossrefMappingTest extends AbstractVocabularyTest { val input = IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/collection/crossref/issn_pub.json"), "utf-8") - println(Crossref2Oaf.convert(input, vocabularies, TransformationType.All)) + Crossref2Oaf.convert(input, vocabularies, TransformationType.All).foreach(record => { + println(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(record)) + }) } + + @Test + def mappingAffiliation(): Unit = { + val input = + IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/collection/crossref/affiliationTest.json"), "utf-8") + val data = Crossref2Oaf.convert(input, vocabularies, TransformationType.OnlyResult) + println(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(data.head)) + } } From 5d344323983e5aa6cff4cd02557bf40d0dc6c5dd Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 19 Nov 2024 15:12:04 +0100 Subject: [PATCH 30/73] align MergeUtils with beta branch --- .../dhp/schema/oaf/utils/MergeUtils.java | 42 +++++++++---------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtils.java index cd8506583..c092f6035 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtils.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtils.java @@ -74,29 +74,29 @@ public class MergeUtils { if (!vocs.vocabularyExists(ModelConstants.DNET_RESULT_TYPOLOGIES)) { return (T) mergedResult; } else { - final Qualifier expectedResultType = vocs - .lookupTermBySynonym( - ModelConstants.DNET_RESULT_TYPOLOGIES, - i.getInstancetype().getClassid()); - - if (Objects.isNull(expectedResultType)) { - throw new IllegalArgumentException( - "instance type not bound to any result type in dnet:result_typologies: " + - i.getInstancetype().getClassid()); - } + final String expectedResultType = Optional + .ofNullable( + vocs + .lookupTermBySynonym( + ModelConstants.DNET_RESULT_TYPOLOGIES, i.getInstancetype().getClassid())) + .orElse(ModelConstants.ORP_DEFAULT_RESULTTYPE) + .getClassid(); // there is a clash among the result types - if (!expectedResultType.getClassid().equals(mergedResult.getResulttype().getClassid())) { - try { - String resulttype = expectedResultType.getClassid(); - if (EntityType.otherresearchproduct.toString().equals(resulttype)) { - resulttype = "other"; - } - Result result = (Result) ModelSupport.oafTypes.get(resulttype).newInstance(); - return (T) mergeResultFields(result, mergedResult); - } catch (InstantiationException | IllegalAccessException e) { - throw new IllegalStateException(e); - } + if (!expectedResultType.equals(mergedResult.getResulttype().getClassid())) { + + Result result = (Result) Optional + .ofNullable(ModelSupport.oafTypes.get(expectedResultType)) + .map(r -> { + try { + return r.newInstance(); + } catch (InstantiationException | IllegalAccessException e) { + throw new IllegalStateException(e); + } + }) + .orElse(new OtherResearchProduct()); + result.setId(mergedResult.getId()); + return (T) mergeResultFields(result, mergedResult); } else { return (T) mergedResult; } From 4e55ddc547451c18b9e14cce8244015e6825b3cf Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 19 Nov 2024 16:50:42 +0100 Subject: [PATCH 31/73] [PubMed aggregation] storing contents into mdStoreVersion/store --- .../dnetlib/dhp/sx/bio/ebi/SparkCreatePubmedDump.scala | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreatePubmedDump.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreatePubmedDump.scala index 1bdd2a4bc..adac9ffb6 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreatePubmedDump.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreatePubmedDump.scala @@ -2,9 +2,13 @@ package eu.dnetlib.dhp.sx.bio.ebi import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.AbstractScalaApplication +import eu.dnetlib.dhp.common.Constants +import eu.dnetlib.dhp.common.Constants.{MDSTORE_DATA_PATH, MDSTORE_SIZE_PATH} import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion import eu.dnetlib.dhp.sx.bio.pubmed.{PMArticle, PMParser2, PubMedToOaf} +import eu.dnetlib.dhp.transformation.TransformSparkJobNode +import eu.dnetlib.dhp.utils.DHPUtils.writeHdfsFile import eu.dnetlib.dhp.utils.ISLookupClientFactory import org.apache.spark.sql.{Encoder, Encoders, SparkSession} import org.slf4j.{Logger, LoggerFactory} @@ -82,7 +86,10 @@ class SparkCreatePubmedDump(propertyPath: String, args: Array[String], log: Logg .write .option("compression", "gzip") .mode("overwrite") - .text(targetPath) + .text(targetPath + MDSTORE_DATA_PATH) + + val mdStoreSize = spark.read.text(targetPath + MDSTORE_DATA_PATH).count + writeHdfsFile(spark.sparkContext.hadoopConfiguration, "" + mdStoreSize, targetPath + MDSTORE_SIZE_PATH) } } From 496007188a720b18ce8301b6050c6e0b924522d1 Mon Sep 17 00:00:00 2001 From: "sandro.labruzzo" Date: Wed, 20 Nov 2024 09:50:09 +0100 Subject: [PATCH 32/73] Added assertion on CrossrefMappingTest --- .../crossref/CrossrefMappingTest.scala | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/collection/crossref/CrossrefMappingTest.scala b/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/collection/crossref/CrossrefMappingTest.scala index f6f71ca66..12ca14ba1 100644 --- a/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/collection/crossref/CrossrefMappingTest.scala +++ b/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/collection/crossref/CrossrefMappingTest.scala @@ -3,12 +3,15 @@ package eu.dnetlib.dhp.collection.crossref import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.aggregation.AbstractVocabularyTest import eu.dnetlib.dhp.collection.crossref.Crossref2Oaf.TransformationType +import eu.dnetlib.dhp.schema.oaf.Publication import org.apache.commons.io.IOUtils -import org.junit.jupiter.api.{BeforeEach, Test} +import org.junit.jupiter.api.{Assertions, BeforeEach, Test} import org.junit.jupiter.api.extension.ExtendWith import org.mockito.junit.jupiter.MockitoExtension import org.slf4j.{Logger, LoggerFactory} +import scala.collection.JavaConverters.asScalaBufferConverter + @ExtendWith(Array(classOf[MockitoExtension])) class CrossrefMappingTest extends AbstractVocabularyTest { @@ -26,7 +29,7 @@ class CrossrefMappingTest extends AbstractVocabularyTest { IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/collection/crossref/issn_pub.json"), "utf-8") Crossref2Oaf.convert(input, vocabularies, TransformationType.All).foreach(record => { - println(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(record)) + Assertions.assertNotNull(record) }) } @@ -37,6 +40,16 @@ class CrossrefMappingTest extends AbstractVocabularyTest { val input = IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/collection/crossref/affiliationTest.json"), "utf-8") val data = Crossref2Oaf.convert(input, vocabularies, TransformationType.OnlyResult) + data.foreach(record => { + Assertions.assertNotNull(record) + Assertions.assertTrue(record.isInstanceOf[Publication]) + val publication = record.asInstanceOf[Publication] + publication.getAuthor.asScala.foreach(author => { + Assertions.assertNotNull(author.getRawAffiliationString) + Assertions.assertTrue(author.getRawAffiliationString.size()>0) + + }) + }) println(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(data.head)) } } From 15227f82b8acecaf7b51cddba83d68bde5f10169 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 20 Nov 2024 15:52:40 +0100 Subject: [PATCH 33/73] added related author's given name and family name in the solr json payload serialisation --- .../model/ProvisionModelSupport.java | 23 +++++++++++++++++-- pom.xml | 2 +- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java index 738d75189..0da0f6955 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java @@ -5,6 +5,7 @@ import java.io.StringReader; import java.util.*; import java.util.stream.Collectors; +import eu.dnetlib.dhp.schema.solr.PersonTopic; import org.apache.commons.lang3.StringUtils; import org.dom4j.Document; import org.dom4j.DocumentException; @@ -39,7 +40,6 @@ import eu.dnetlib.dhp.schema.solr.OpenAccessColor; import eu.dnetlib.dhp.schema.solr.OpenAccessRoute; import eu.dnetlib.dhp.schema.solr.Organization; import eu.dnetlib.dhp.schema.solr.Person; -import eu.dnetlib.dhp.schema.solr.PersonTopic; import eu.dnetlib.dhp.schema.solr.Pid; import eu.dnetlib.dhp.schema.solr.Project; import eu.dnetlib.dhp.schema.solr.Result; @@ -174,6 +174,8 @@ public class ProvisionModelSupport { && StringUtils.isNotBlank(relation.getValidationDate())) { rr.setValidationDate(relation.getValidationDate()); } + rr.setGivenName(re.getGivenName()); + rr.setFamilyName(re.getFamilyName()); return rr; } @@ -208,11 +210,28 @@ public class ProvisionModelSupport { ps.setAlternativeNames(p.getAlternativeNames()); ps.setBiography(p.getBiography()); ps.setConsent(p.getConsent()); - // ps.setSubject(...)); + ps.setSubject(mapPersonTopics(p.getSubject())); return ps; } + private static List mapPersonTopics(List subjects) { + return Optional.ofNullable(subjects) + .map(ss -> ss.stream() + .map(ProvisionModelSupport::mapPersonTopic) + .collect(Collectors.toList())) + .orElse(null); + } + + private static PersonTopic mapPersonTopic(eu.dnetlib.dhp.schema.oaf.PersonTopic pt) { + PersonTopic topic = new PersonTopic(); + topic.setValue(pt.getValue()); + topic.setSchema(pt.getSchema()); + topic.setFromYear(pt.getFromYear()); + topic.setToYear(pt.getToYear()); + return topic; + } + private static Funding mapFunding(List fundingtree, VocabularyGroup vocs) { SAXReader reader = new SAXReader(); return Optional diff --git a/pom.xml b/pom.xml index 9480ddfc0..033d88b0b 100644 --- a/pom.xml +++ b/pom.xml @@ -937,7 +937,7 @@ 1.1.3 1.7 1.0.7 - [9.0.0] + [10.0.0] cdh5.9.2 3.5 11.0.2 From e5b04e61ff8f7f7b1d80f607e0fcb4da7c7c9f37 Mon Sep 17 00:00:00 2001 From: Miriam Baglioni Date: Thu, 21 Nov 2024 10:20:12 +0100 Subject: [PATCH 34/73] [CommunityPatents] extends the community propagation considering also the results of type patents linked with a isrelatedto semantcis --- .../main/java/eu/dnetlib/dhp/api/Utils.java | 2 +- .../PrepareResultCommunitySetStep1.java | 51 ++++++++++++++++--- .../PrepareResultCommunitySetStep2.java | 18 ++----- 3 files changed, 49 insertions(+), 22 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/Utils.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/Utils.java index 6079da365..1e39d99c3 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/Utils.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/Utils.java @@ -171,7 +171,7 @@ public class Utils implements Serializable { public static List getCommunityIdList(String baseURL) throws IOException { return getValidCommunities(baseURL) .stream() - .map(community -> community.getId()) + .map(CommunityModel::getId) .collect(Collectors.toList()); } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep1.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep1.java index aede9ef05..ff496bb87 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep1.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep1.java @@ -3,11 +3,14 @@ package eu.dnetlib.dhp.resulttocommunityfromsemrel; import static eu.dnetlib.dhp.PropagationConstant.*; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; +import static java.lang.String.join; import java.io.IOException; import java.util.Arrays; +import java.util.Collections; import java.util.List; +import eu.dnetlib.dhp.schema.common.ModelConstants; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.sql.*; @@ -45,7 +48,7 @@ public class PrepareResultCommunitySetStep1 { /** * a dataset for example could be linked to more than one publication. For each publication linked to that dataset - * the previous query will produce a row: targetId set of community context the target could possibly inherit with + * the previous query will produce a row: targetId, set of community context the target could possibly inherit. With * the following query there will be a single row for each result linked to more than one result of the result type * currently being used */ @@ -56,6 +59,16 @@ public class PrepareResultCommunitySetStep1 { + "where length(co) > 0 " + "group by resultId"; + private static final String RESULT_CONTEXT_QUERY_TEMPLATE_IS_RELATED_TO = "select target resultId, community_context " + + "from (select id, collect_set(co.id) community_context " + + " from result " + + " lateral view explode (context) c as co " + + " where datainfo.deletedbyinference = false %s " + + " and array_contains(instance.instancetype.classname, 'Patent') group by id) p " + + " JOIN " + + " (select source, target from relation " + + " where datainfo.deletedbyinference = false %s ) r ON p.id = r.source"; + public static void main(String[] args) throws Exception { String jsonConfiguration = IOUtils .toString( @@ -82,15 +95,20 @@ public class PrepareResultCommunitySetStep1 { SparkConf conf = new SparkConf(); conf.set("hive.metastore.uris", parser.get("hive_metastore_uris")); - final List allowedsemrel = Arrays.asList(parser.get("allowedsemrels").split(";")); + final String allowedsemrel = join(",", Arrays.stream(parser.get("allowedsemrels").split(";")) + .map(value -> "'" + value.toLowerCase() + "'") + .toArray(String[]::new)); + log.info("allowedSemRel: {}", new Gson().toJson(allowedsemrel)); final String baseURL = parser.get("baseURL"); log.info("baseURL: {}", baseURL); - final List communityIdList = getCommunityList(baseURL); - log.info("communityIdList: {}", new Gson().toJson(communityIdList)); + final String communityIdList = join(",", getCommunityList(baseURL).stream() + .map(value -> "'" + value.toLowerCase() + "'") + .toArray(String[]::new)); + log.info("communityIdList: {}", new Gson().toJson(communityIdList)); final String resultType = resultClassName.substring(resultClassName.lastIndexOf(".") + 1).toLowerCase(); log.info("resultType: {}", resultType); @@ -118,10 +136,10 @@ public class PrepareResultCommunitySetStep1 { SparkSession spark, String inputPath, String outputPath, - List allowedsemrel, + String allowedsemrel, Class resultClazz, String resultType, - List communityIdList) { + String communityIdList) { final String inputResultPath = inputPath + "/" + resultType; log.info("Reading Graph table from: {}", inputResultPath); @@ -141,8 +159,15 @@ public class PrepareResultCommunitySetStep1 { String resultContextQuery = String .format( RESULT_CONTEXT_QUERY_TEMPLATE, - getConstraintList(" lower(co.id) = '", communityIdList), - getConstraintList(" lower(relClass) = '", allowedsemrel)); + " lower(co.id) IN " + communityIdList, + " AND lower(relClass) IN " + allowedsemrel); + + String resultContextQueryIsRelatedTo = String + .format( + RESULT_CONTEXT_QUERY_TEMPLATE_IS_RELATED_TO, + " AND lower(co.id) IN " + communityIdList, + "AND lower(relClass) = '"+ + ModelConstants.IS_RELATED_TO.toLowerCase() + "'"); Dataset result_context = spark.sql(resultContextQuery); result_context.createOrReplaceTempView("result_context"); @@ -154,6 +179,16 @@ public class PrepareResultCommunitySetStep1 { .option("compression", "gzip") .mode(SaveMode.Overwrite) .json(outputResultPath); + + result_context = spark.sql(resultContextQueryIsRelatedTo); + result_context.createOrReplaceTempView("result_context"); + spark + .sql(RESULT_COMMUNITY_LIST_QUERY) + .as(Encoders.bean(ResultCommunityList.class)) + .write() + .option("compression", "gzip") + .mode(SaveMode.Append) + .json(outputResultPath); } public static List getCommunityList(final String baseURL) throws IOException { diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep2.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep2.java index a53d3dfe3..9bebc36e5 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep2.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep2.java @@ -4,6 +4,7 @@ package eu.dnetlib.dhp.resulttocommunityfromsemrel; import static eu.dnetlib.dhp.PropagationConstant.*; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import java.util.ArrayList; import java.util.HashSet; import java.util.Set; @@ -76,22 +77,13 @@ public class PrepareResultCommunitySetStep2 { if (b == null) { return a; } - Set community_set = new HashSet<>(); - a.getCommunityList().stream().forEach(aa -> community_set.add(aa)); - b - .getCommunityList() - .stream() - .forEach( - aa -> { - if (!community_set.contains(aa)) { - a.getCommunityList().add(aa); - community_set.add(aa); - } - }); + Set community_set = new HashSet<>(a.getCommunityList()); + community_set.addAll(b.getCommunityList()); + a.setCommunityList(new ArrayList<>(community_set)); return a; }) .map(Tuple2::_2) - .map(r -> OBJECT_MAPPER.writeValueAsString(r)) + .map(OBJECT_MAPPER::writeValueAsString) .saveAsTextFile(outputPath, GzipCodec.class); } From 821700299a31e5201f616fd4235dcd599018d19b Mon Sep 17 00:00:00 2001 From: Miriam Baglioni Date: Fri, 22 Nov 2024 17:21:58 +0100 Subject: [PATCH 35/73] [patents] added test and resources --- .../PrepareResultCommunitySetStep1.java | 93 +++++++++++-------- .../ResultToCommunityJobTest.java | 55 +++++++++++ .../graph/publication/part-00000.json | 13 +++ .../graph/relation/part-00000.json | 24 +++++ 4 files changed, 147 insertions(+), 38 deletions(-) create mode 100644 dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/graph/publication/part-00000.json create mode 100644 dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/graph/relation/part-00000.json diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep1.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep1.java index ff496bb87..8f23a4cfd 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep1.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep1.java @@ -37,7 +37,8 @@ public class PrepareResultCommunitySetStep1 { * relation */ // TODO - private static final String RESULT_CONTEXT_QUERY_TEMPLATE = "select target resultId, community_context " + private static final String RESULT_CONTEXT_QUERY_TEMPLATE = + "select target resultId, community_context " + "from (select id, collect_set(co.id) community_context " + " from result " + " lateral view explode (context) c as co " @@ -59,15 +60,26 @@ public class PrepareResultCommunitySetStep1 { + "where length(co) > 0 " + "group by resultId"; - private static final String RESULT_CONTEXT_QUERY_TEMPLATE_IS_RELATED_TO = "select target resultId, community_context " - + "from (select id, collect_set(co.id) community_context " - + " from result " - + " lateral view explode (context) c as co " - + " where datainfo.deletedbyinference = false %s " - + " and array_contains(instance.instancetype.classname, 'Patent') group by id) p " - + " JOIN " - + " (select source, target from relation " - + " where datainfo.deletedbyinference = false %s ) r ON p.id = r.source"; + private static final String RESULT_CONTEXT_QUERY_TEMPLATE_IS_RELATED_TO = + "select target as resultId, community_context " + + "from resultWithContext rwc " + + "join relatedToRelations r " + + "join patents p " + + "on rwc.id = r.source and r.target = p.id"; + + private static final String RESULT_WITH_CONTEXT = "select id, collect_set(co.id) community_context \n" + + " from result " + + " lateral view explode (context) c as co " + + " where datainfo.deletedbyinference = false AND lower(co.id) IN %s" + + " group by id"; + + private static final String RESULT_PATENT = "select id " + + " from result " + + " where array_contains(instance.instancetype.classname, 'Patent')"; + + private static final String IS_RELATED_TO_RELATIONS = "select source, target " + + " from relation " + + " where lower(relClass) = 'isrelatedto' and datainfo.deletedbyinference = false"; public static void main(String[] args) throws Exception { String jsonConfiguration = IOUtils @@ -95,20 +107,18 @@ public class PrepareResultCommunitySetStep1 { SparkConf conf = new SparkConf(); conf.set("hive.metastore.uris", parser.get("hive_metastore_uris")); - final String allowedsemrel = join(",", Arrays.stream(parser.get("allowedsemrels").split(";")) - .map(value -> "'" + value.toLowerCase() + "'") - .toArray(String[]::new)); - - log.info("allowedSemRel: {}", new Gson().toJson(allowedsemrel)); + final String allowedsemrel ="(" + join(",", + Arrays.asList(parser.get("allowedsemrels").split(";")).stream().map(value -> "'" + value.toLowerCase() + "'") + .toArray(String[]::new)) + ")"; + log.info("allowedSemRel: {}", allowedsemrel); final String baseURL = parser.get("baseURL"); log.info("baseURL: {}", baseURL); - final String communityIdList = join(",", getCommunityList(baseURL).stream() + final String communityIdList = "(" + join(",", getCommunityList(baseURL).stream() .map(value -> "'" + value.toLowerCase() + "'") - .toArray(String[]::new)); + .toArray(String[]::new)) + ")"; - log.info("communityIdList: {}", new Gson().toJson(communityIdList)); final String resultType = resultClassName.substring(resultClassName.lastIndexOf(".") + 1).toLowerCase(); log.info("resultType: {}", resultType); @@ -156,32 +166,38 @@ public class PrepareResultCommunitySetStep1 { final String outputResultPath = outputPath + "/" + resultType; log.info("writing output results to: {}", outputResultPath); + String resultContextQuery = String .format( RESULT_CONTEXT_QUERY_TEMPLATE, - " lower(co.id) IN " + communityIdList, - " AND lower(relClass) IN " + allowedsemrel); - - String resultContextQueryIsRelatedTo = String - .format( - RESULT_CONTEXT_QUERY_TEMPLATE_IS_RELATED_TO, - " AND lower(co.id) IN " + communityIdList, - "AND lower(relClass) = '"+ - ModelConstants.IS_RELATED_TO.toLowerCase() + "'"); - + "AND lower(co.id) IN " + communityIdList, + "AND lower(relClass) IN " + allowedsemrel); Dataset result_context = spark.sql(resultContextQuery); + //result_context.createOrReplaceTempView("result_context"); + +// spark +// .sql(RESULT_COMMUNITY_LIST_QUERY) +// .as(Encoders.bean(ResultCommunityList.class)) +// .write() +// .option("compression", "gzip") +// .mode(SaveMode.Overwrite) +// .json(outputResultPath); + + Dataset rwc = spark.sql(String.format(RESULT_WITH_CONTEXT, communityIdList)); + Dataset patents = spark.sql(RESULT_PATENT); + Dataset relatedToRelations = spark.sql(IS_RELATED_TO_RELATIONS); + + rwc.createOrReplaceTempView("resultWithContext"); + + patents.createOrReplaceTempView("patents"); + + relatedToRelations.createOrReplaceTempView("relatedTorelations"); + + + result_context = result_context.unionAll( spark.sql(RESULT_CONTEXT_QUERY_TEMPLATE_IS_RELATED_TO)); + result_context.createOrReplaceTempView("result_context"); - spark - .sql(RESULT_COMMUNITY_LIST_QUERY) - .as(Encoders.bean(ResultCommunityList.class)) - .write() - .option("compression", "gzip") - .mode(SaveMode.Overwrite) - .json(outputResultPath); - - result_context = spark.sql(resultContextQueryIsRelatedTo); - result_context.createOrReplaceTempView("result_context"); spark .sql(RESULT_COMMUNITY_LIST_QUERY) .as(Encoders.bean(ResultCommunityList.class)) @@ -189,6 +205,7 @@ public class PrepareResultCommunitySetStep1 { .option("compression", "gzip") .mode(SaveMode.Append) .json(outputResultPath); + } public static List getCommunityList(final String baseURL) throws IOException { diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/ResultToCommunityJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/ResultToCommunityJobTest.java index 0d5b12c80..4361b6f39 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/ResultToCommunityJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/ResultToCommunityJobTest.java @@ -6,8 +6,11 @@ import static org.apache.spark.sql.functions.desc; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; +import java.util.ArrayList; import java.util.List; +import java.util.stream.Collectors; +import eu.dnetlib.dhp.resulttocommunityfromorganization.ResultCommunityList; import org.apache.commons.io.FileUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; @@ -25,6 +28,7 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.schema.oaf.Dataset; +import scala.collection.Seq; public class ResultToCommunityJobTest { @@ -271,4 +275,55 @@ public class ResultToCommunityJobTest { .get(0) .getString(0)); } + + @Test + public void prepareStep1Test() throws Exception { + /* + + + final String allowedsemrel = join(",", Arrays.stream(parser.get("allowedsemrels").split(";")) + .map(value -> "'" + value.toLowerCase() + "'") + .toArray(String[]::new)); + + log.info("allowedSemRel: {}", new Gson().toJson(allowedsemrel)); + + final String baseURL = parser.get("baseURL"); + log.info("baseURL: {}", baseURL); + */ + PrepareResultCommunitySetStep1 + .main( + new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-sourcePath", getClass() + .getResource("/eu/dnetlib/dhp/resulttocommunityfromsemrel/graph") + .getPath(), + "-hive_metastore_uris", "", + "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication", + "-outputPath", workingDir.toString() + "/preparedInfo", + "-allowedsemrels","issupplementto;issupplementedby", + "-baseURL","https://dev-openaire.d4science.org/openaire/community/" + }); + + + org.apache.spark.sql.Dataset resultCommunityList = spark.read().schema(Encoders.bean(ResultCommunityList.class).schema()) + .json(workingDir.toString() + "/preparedInfo/publication") + .as(Encoders.bean(ResultCommunityList.class)); + + Assertions.assertEquals(2, resultCommunityList.count()); + Assertions.assertEquals(1,resultCommunityList.filter("resultId = '50|dedup_wf_001::06e51d2bf295531b2d2e7a1b55500783'").count()); + Assertions.assertEquals(1,resultCommunityList.filter("resultId = '50|pending_org_::82f63b2d21ae88596b9d8991780e9888'").count()); + + ArrayList communities = resultCommunityList + .filter("resultId = '50|dedup_wf_001::06e51d2bf295531b2d2e7a1b55500783'") + .first().getCommunityList(); + Assertions.assertEquals(2, communities.size()); + Assertions.assertTrue(communities.stream().anyMatch(cid -> "beopen".equals(cid))); + Assertions.assertTrue(communities.stream().anyMatch(cid -> "dh-ch".equals(cid))); + + communities = resultCommunityList + .filter("resultId = '50|pending_org_::82f63b2d21ae88596b9d8991780e9888'") + .first().getCommunityList(); + Assertions.assertEquals(1, communities.size()); + Assertions.assertEquals("dh-ch", communities.get(0)); + } } diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/graph/publication/part-00000.json b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/graph/publication/part-00000.json new file mode 100644 index 000000000..7957bcfd5 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/graph/publication/part-00000.json @@ -0,0 +1,13 @@ +{"author":[{"fullname":"Tanouayi, Gnon","name":"Gnon","pid":[],"rank":1,"surname":"Tanouayi"},{"fullname":"GNANDI, Kissao","name":"Kissao","pid":[],"rank":2,"surname":"Gnandi"},{"fullname":"Ouro-Sama, Kamilou","name":"Kamilou","pid":[],"rank":3,"surname":"Ouro-Sama"},{"fullname":"Ahoudi, Housséni","name":"Housséni","pid":[],"rank":4,"surname":"Ahoudi"},{"fullname":"Solitoke, Hodabalo Dhéoulaba","name":"Hodabalo Dhéoulaba","pid":[],"rank":5,"surname":"Solitoke"},{"fullname":"Badassan, Tchaa Esso-Essinam","name":"Tchaa Esso-Essinam","pid":[],"rank":6,"surname":"Badassan"},{"fullname":"Nyametso, A. Yawovi","name":"A. Yawovi","pid":[],"rank":7,"surname":"Nyametso"},{"fullname":"Agbéko, Aduayi-Akué Adoté","name":"Aduayi-Akué Adoté","pid":[],"rank":8,"surname":"Agbéko"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|eurocrisdris::fe4903425d9040f680d8610d9079ea14","value":"Episciences"}],"context":[],"contributor":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Importer of dst articles previously hosted by inist Eid system account"}],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2017-05-01"},"dateofcollection":"2022-03-01T00:17:08.896Z","dateoftransformation":"2022-03-01T02:57:50.566Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"This study is a contribution to the development of adsorption techniques for the removal of fluoride in natural waters. The work is carried out on a laboratory scale using local geo-materials sorbents, on the one hand the residues from the treatment of natural phosphorite of Hahotoé-Kpogamé and on the other hand the attapulgite clay mineral from the costal basin of Togo. The work carried out concerns the adsorption of fluoride on those sorbents. The following parameters are batch tested on synthetic fluoride solutions: time, solution pH, geo-material dose and fluoride concentration. The fluoride is analyzed by absorption spectrometry. The adsorption yields on the phosphorite treatment residues for aqueous fluoride solutions at an initial concentration of 10 mg/L and an adsorbent concentration of 10 g/L are 49 % at pH 6.5 and 66 % at pH 4.0. In the same experimental conditions, the yields on clay minerals are 28.2 % and 36.3 %. These yields are logically improved by increasing the adsorbent dosage (from 2 to 30 g/L). Additional tests are carried out on natural water at an initial fluoride concentration of 3.76 mg/L."},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Cette étude est une contribution au développement de techniques d’adsorption pour l’élimination du fluor dans les eaux naturelles. Les travaux ont été réalisés à l’échelle du laboratoire en utilisant comme sorbants des géo-matériaux locaux, d’une part les résidus du traitement des phosphates naturels de Hahotoé-Kpogamé et d’autre part l’argilite feuilletée du bassin sédimentaire côtier du Togo. Les travaux réalisés concernent l’adsorption du fluor sur les sorbants considérés. Les paramètres suivants ont été testés en batch sur des solutions synthétiques de fluor : le temps, le pH de la solution, la dose du géo-matériau et la concentration du fluor. Le fluor a été dosé par spectromètrie d’absorption. Les rendements d’adsorption sur les résidus de traitement des phosphates pour des solutions aqueuses de fluor à concentration initiale de 10 mg/L et une concentration en adsorbant de 10 g/L ont été de 49 % à pH 6,5 et 66 % à pH de 4,0. Dans les mêmes conditions expérimentales, les rendements sur les argilites ont été de 28,2 % et 36,3 %. Ces rendements ont logiquement été améliorés en augmentant le dosage en adsorbant (de 2 à 30 g/L). Des essais complémentaires ont été réalisés sur une eau naturelle à une concentration initiale en fluor de 3,76 mg/L."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|06cdd3ff4700::49ec404cee4e1452808aabeaffbd3072","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.4267/dechets-sciences-techniques.3534"}],"collectedfrom":{"key":"10|openaire____::6824b298c96ba906a3e6a70593affbf5","value":"Episciences"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2017-05-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::6824b298c96ba906a3e6a70593affbf5","value":"Episciences"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://eid.episciences.org/7781"]}],"language":{"classid":"fra/fre","classname":"French","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1646506202085,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Foai.episciences.org%2F","datestamp":"2017-05-01","harvestDate":"2022-03-01T00:17:08.896Z","identifier":"oai:episciences.org:eid:7781","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:episciences.org:eid:7781","50|06cdd3ff4700::49ec404cee4e1452808aabeaffbd3072"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"ISSN: 2778-844X"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Environnement, Ingénierie & Développement"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Episciences.org"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"eid:7781 - Environnement, Ingénierie & Développement, 2017-05-01, N°73 - mai 2017"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"clay"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"geo-materials"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"phosphorite of Hahotoé-Kpogamé"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"removal of fluoride"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"argilite"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"élimination du fluor"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"géo-matériaux"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"phosphate de Hahotoé-Kpogamé"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"[SDE.IE]Environmental Sciences/Environmental Engineering"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Défluoruration des eaux à l’aide des résidus du traitement des phosphates naturels et des argilites feuilletées"}]} +{"author":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::160a261e5d06fd542c2efcac6e17e08c","value":"RS Global Journals"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2016-02-28"},"dateofcollection":"2020-06-01T07:11:47.22Z","dateoftransformation":"2020-07-25T07:25:11.051Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"The results of treatment of 21 patients with multiple injuries, including 12 (57.2%) patients with lesions of limb bones, 9 (42.8%) patients with injuries of the pelvis treated at the Department of Traumatology number 2, 5 for the period of 2013 to 2014 were analyzed by the authors. Developed gentle immobilization of the lower limbs in patients with multiple injuries provides adequate fixation and extension of the lower limb in the intensive care period and during emergency external fixation by the device of external fixation device shin bone or hip.The introduction of surgical treatment of patients in the acute period of polytrauma, using minimally invasive fracture fixation technology allowed providing the early stabilization of the victim’s condition, to avoid diagnostic errors and obtain positive results of treatment in 98% of cases."}],"externalReference":[],"extraInfo":[],"format":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"application/pdf"}],"fulltext":[],"id":"50|07b5c0ccd4fe::e7f5459cc97865f2af6e3da964c1250b","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::160a261e5d06fd542c2efcac6e17e08c","value":"RS Global Journals"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2016-02-28"},"distributionlocation":"","hostedby":{"key":"10|eurocrisdris::fe4903425d9040f680d8610d9079ea14","value":"World Science"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"https://creativecommons.org/licenses/by/4.0"},"pid":[],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://rsglobal.pl/index.php/ws/article/view/895"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"","issnLinking":"","issnOnline":"2413-1032","issnPrinted":"2414-6404","name":"World Science","sp":"","vol":""},"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1646506135978,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Frsglobal.pl%2Findex.php%2Findex%2Foai","datestamp":"2020-05-23T18:09:46Z","harvestDate":"2020-06-01T07:11:47.22Z","identifier":"oai:ojs2.rsglobal.pl:article/895","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:ojs2.rsglobal.pl:article/895","50|07b5c0ccd4fe::e7f5459cc97865f2af6e3da964c1250b"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"RS Global Sp. z O.O."},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"World Science; Vol 3 No 2(6) (2016): World Science; 43-50"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"World Science; Том 3 № 2(6) (2016): World Science; 43-50"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2414-6404"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2413-1032"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Polytrauma"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"multiple trauma"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"combined injury injury"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"the severity of the damage"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"the device of external fixator"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"submersible osteosynthesis"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"«damage control»"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Polytrauma"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"multiple trauma"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"combined injury injury"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"the severity of the damage"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"the device of external fixator"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"submersible osteosynthesis"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"«damage control»"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"ЛЕЧЕНИЯ ПАЦИЕНТОВ С СОЧЕТАННЫМИ И МНОЖЕСТВЕННЫМИ ТРАВМАМИ КОНЕЧНОСТЕЙ И КОСТЕЙ ТАЗА"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"ЛЕЧЕНИЯ ПАЦИЕНТОВ С СОЧЕТАННЫМИ И МНОЖЕСТВЕННЫМИ ТРАВМАМИ КОНЕЧНОСТЕЙ И КОСТЕЙ ТАЗА"}]} +{"author":[{"fullname":"Kemppainen, Mika","name":"Mika","pid":[],"rank":1,"surname":"Kemppainen"},{"fullname":"Virkkunen, Iikka","name":"Iikka","pid":[],"rank":2,"surname":"Virkkunen"},{"fullname":"Pitkänen, Jorma","name":"Jorma","pid":[],"rank":3,"surname":"Pitkänen"},{"fullname":"Paussu, Raimo","name":"Raimo","pid":[],"rank":4,"surname":"Paussu"},{"fullname":"Hänninen, Hannu","name":"Hannu","pid":[],"rank":5,"surname":"Hänninen"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2003-01-01"},"dateofcollection":"2022-02-28T12:34:39.606Z","dateoftransformation":"2022-02-28T14:02:46.142Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::210c52944502777ba567442480e6a76e","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|eurocrisdris::fe4903425d9040f680d8610d9079ea14","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2003-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/66c69ab9-a33b-4817-97cd-1f38b3b31405"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1646505945226,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2019-09-23T08:45:14Z","harvestDate":"2022-02-28T12:34:39.606Z","identifier":"oai:cris.vtt.fi:publications/66c69ab9-a33b-4817-97cd-1f38b3b31405","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["50|355e65625b88::210c52944502777ba567442480e6a76e","oai:cris.vtt.fi:publications/66c69ab9-a33b-4817-97cd-1f38b3b31405"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Kemppainen , M , Virkkunen , I , Pitkänen , J , Paussu , R & Hänninen , H 2003 , ' Comparison of realistic artificial cracks and in-service cracks ' , The e-Journal of Nondestructive Testing & Ultrasonics , vol. 8 , no. 3 , 6 . < http://www.ndt.net/article/ecndt02/401/401.htm >"}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Comparison of realistic artificial cracks and in-service cracks"}]} +{"author":[{"fullname":"Kelhä, Väinö","name":"Väinö","pid":[],"rank":1,"surname":"Kelhä"},{"fullname":"Manninen, M.","name":"M.","pid":[],"rank":2,"surname":"Manninen"},{"fullname":"Oittinen, P.","name":"P.","pid":[],"rank":3,"surname":"Oittinen"},{"fullname":"Tiesmäki, Jarkko","name":"Jarkko","pid":[],"rank":4,"surname":"Tiesmäki"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1974-01-01"},"dateofcollection":"2022-02-28T12:32:28.547Z","dateoftransformation":"2022-02-28T14:46:21.4Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::3754cff043a1700077031ea29f8cc240","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1974-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/d2b5302f-004a-407c-8f9c-4312e0dbf679"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1646505968213,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2021-08-31T11:55:56Z","harvestDate":"2022-02-28T12:32:28.547Z","identifier":"oai:cris.vtt.fi:publications/d2b5302f-004a-407c-8f9c-4312e0dbf679","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["50|355e65625b88::3754cff043a1700077031ea29f8cc240","oai:cris.vtt.fi:publications/d2b5302f-004a-407c-8f9c-4312e0dbf679"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Kelhä , V , Manninen , M , Oittinen , P & Tiesmäki , J 1974 , ' A parallel plate tackmeter for measuring the splitting resistance of printing inks ' , Surface Coatings International: JOCCA , vol. 57 , pp. 184-188 ."}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"A parallel plate tackmeter for measuring the splitting resistance of printing inks"}]} +{"author":[{"fullname":"Mononen, Petri","name":"Petri","pid":[],"rank":1,"surname":"Mononen"},{"fullname":"Innamaa, Satu","name":"Satu","pid":[],"rank":2,"surname":"Innamaa"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2013-01-01"},"dateofcollection":"2022-02-28T12:38:14.128Z","dateoftransformation":"2022-02-28T14:48:17.052Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::3875365f5052758953b072682e62bc80","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2013-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/8602dae4-00e8-4f45-828b-65a367eb4730"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1646505969378,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2021-09-17T12:01:51Z","harvestDate":"2022-02-28T12:38:14.128Z","identifier":"oai:cris.vtt.fi:publications/8602dae4-00e8-4f45-828b-65a367eb4730","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/8602dae4-00e8-4f45-828b-65a367eb4730","50|355e65625b88::3875365f5052758953b072682e62bc80"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Mononen , P & Innamaa , S 2013 , ' Enhancing journey quality : Field Operational Test of Aftermarket and Nomadic Devices in Vehicles ' , Baltic Transport Journal , pp. 46-47 ."}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Enhancing journey quality:Field Operational Test of Aftermarket and Nomadic Devices in Vehicles"}]} +{"author":[{"fullname":"Tsupari, Eemeli","name":"Eemeli","pid":[],"rank":1,"surname":"Tsupari"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|eurocrisdris::9ae43d14471c4b33661fedda6f06b539","value":"VTT Research Information System"},{"key":"10|opendoar____::f0dd4a99fba6075a9494772b58f95280","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2018-10-01"},"dateofcollection":"2022-02-28T12:38:39.075Z","dateoftransformation":"2022-02-28T15:37:43.154Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Despite international agreements, global greenhouse gas (GHG) emissions have not decreased according to the targets. Consequently, our generation is creating an enormous problem for future generations. As climate change is a global problem, GHG emissions must decrease globally. Consequently, international policies are needed, actions should be effective and the impacts should be assessed with broad boundaries. In Europe, the cornerstone of climate policy is the EU Emissions Trading Scheme (EU ETS) but the rebound impacts within the EU ETS are often excluded in the assessments. This dissertation examines the impacts of major CO2 emission reduction solutions with different system boundaries, highlighting the importance of boundary selection on the results. In addition, the economic feasibilities of the selected solutions are evaluated.The case examples represent the most important sectors in terms of global CO2 emissions, such as electricity and heat production, the steel industry and transport. The studied technologies include efficient Waste-to-Energy (WtE) concepts with high power-to-heat ratio, utilisation of CO2 Capture and Storage (CCS) in different applications, replacing steel mill blast furnaces with Oxygen Blast Furnaces (OBF), Combined Heat and Power (CHP) and Carbon Capture and Utilisation (CCU) for storable fuels, which can be used for example in transportation. The results highlight the importance of the consequences in the electricity production system as well as the rebound impacts in the EU ETS. For example, the studied concepts to decrease direct GHG emissions of steel mills lead to increased power purchase from markets and consequently increase in emissions of the power system. The impacts of CCU concepts based on electrolysis increase the emissions in electricity production but enable a decrease in the usage of fossil fuels in transportation. In addition, converting electricity to storable fuels enable higher shares of variable solar and wind energy in the power systems. The consequences in the power systems are complex, including for example the impacts on electricity imports and exports, future investments and the EU ETS. Even if these impacts can be recognised by qualitative means, unambiguous quantitative consequences cannot be given. Understanding the decisive impacts of the framework and boundaries is crucial to interpreting different assessments and making effective actions and policy decisions. Solutions which decrease emissions within a narrow system boundary can actually increase the emissions of the broader system."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::54a1c76f520bb2c8da27d12e42891088","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2018-10-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0006","classname":"Doctoral thesis","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/d62ac5ef-7347-400f-95b2-59d970ceb505"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1646505683219,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2021-05-18T10:33:00Z","harvestDate":"2022-02-28T12:38:39.075Z","identifier":"oai:cris.vtt.fi:publications/d62ac5ef-7347-400f-95b2-59d970ceb505","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/d62ac5ef-7347-400f-95b2-59d970ceb505","50|355e65625b88::54a1c76f520bb2c8da27d12e42891088"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Aalto University"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tsupari , E 2018 , ' Impact of system boundaries on the effectiveness of climate change mitigation actions : Dissertation ' , Doctor Degree , Aalto University . < http://urn.fi/URN:ISBN:978-952-60-8358-2 >"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"energy"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"environmental science"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"climate change mitigation"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"greenhouse gases"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"carbon dioxide"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"emissions trading"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"economic feasibility"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/dk/atira/pure/sustainabledevelopmentgoals/climate_action"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"SDG 13 - Climate Action"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Impact of system boundaries on the effectiveness of climate change mitigation actions:Dissertation"}]} +{"author":[{"fullname":"Turkia, Heidi","name":"Heidi","pid":[],"rank":1,"surname":"Turkia"},{"fullname":"Sirén, Heli","name":"Heli","pid":[],"rank":2,"surname":"Sirén"},{"fullname":"Penttilä, Merja","name":"Merja","pid":[],"rank":3,"surname":"Penttilä"},{"fullname":"Pitkänen, Juha Pekka","name":"Juha Pekka","pid":[],"rank":4,"surname":"Pitkänen"}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2013-02-22"},"dateofcollection":"2022-02-28T12:29:51.291Z","dateoftransformation":"2022-02-28T16:19:35.201Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"

Hydrolysates of lignocellulosic biomass, used as substrates for the sustainable production of fuels and chemicals often contain high amounts of phenolic compounds inhibiting the production microbiota. Quantification of these inhibitor compounds may help to understand possible difficulties in bioprocessing and further the development of more efficient, robust and tolerable processes. A separation method based on capillary electrophoresis with UV detection was developed for the simultaneous quantification of 10 phenolic compounds that may have inhibitor properties. Intraday relative standard deviations were less than 0.7% for migration times and between 2.6% and 6.4% for peak areas. Interday relative standard deviations were less than 3.0% for migration times and between 5.0% and 7.2% for peak areas. The method was applied to demonstrate that Saccharomyces cerevisiae was able to decrease the concentrations of vanillin, coniferyl aldehyde, syringaldehyde, acetoguaiacone and cinnamic acid during the cultivation, whereas the concentrations of phenols increased.

"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::6c232359e3b3165574cb88f0554d9264","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1016/j.chroma.2013.01.004"}],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2013-02-22"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/91f411d0-d8f8-4bf1-9072-345303cc776c"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1646505708387,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2022-01-29T08:16:51Z","harvestDate":"2022-02-28T12:29:51.291Z","identifier":"oai:cris.vtt.fi:publications/91f411d0-d8f8-4bf1-9072-345303cc776c","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/91f411d0-d8f8-4bf1-9072-345303cc776c","50|355e65625b88::6c232359e3b3165574cb88f0554d9264"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Turkia , H , Sirén , H , Penttilä , M & Pitkänen , J P 2013 , ' Capillary electrophoresis for the monitoring of phenolic compounds in bioprocesses ' , Journal of Chromatography A , vol. 1278 , pp. 175-180 . https://doi.org/10.1016/j.chroma.2013.01.004"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Bioprocess monitoring"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Capillary electrophoresis"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Phenolic compounds"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Capillary electrophoresis for the monitoring of phenolic compounds in bioprocesses"}]} +{"author":[{"fullname":"Veijalainen, Jari","name":"Jari","pid":[],"rank":1,"surname":"Veijalainen"},{"fullname":"Wolski, Antoni","name":"Antoni","pid":[],"rank":2,"surname":"Wolski"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[{"dataInfo": null,"id": "dariah"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1991-01-01"},"dateofcollection":"2022-02-28T12:33:57.005Z","dateoftransformation":"2022-02-28T16:33:35.101Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::74009c567c81b4aa55c813db658734df","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1991-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0002","classname":"Book","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/bb17c77a-f574-4921-a5cb-32dc1f283fa3"]},{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1991-01-01"},"distributionlocation":"","hostedby":{"key":"10|eurocrisdris::fe4903425d9040f680d8610d9079ea14","value":"VTT Research Information System"},"instancetype":{"classid":"0002","classname":"Book","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/bb17c77a-f574-4921-a5cb-32dc1f283fa3"]}, {"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1991-01-01"},"distributionlocation":"","hostedby":{"key":"10|opendoar____::f0dd4a99fba6075a9494772b58f95280","value":"VTT Research Information System"},"instancetype":{"classid":"0002","classname":"Book","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/bb17c77a-f574-4921-a5cb-32dc1f283fa3"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1646505716994,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2020-12-21T07:05:54Z","harvestDate":"2022-02-28T12:33:57.005Z","identifier":"oai:cris.vtt.fi:publications/bb17c77a-f574-4921-a5cb-32dc1f283fa3","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["50|355e65625b88::74009c567c81b4aa55c813db658734df","oai:cris.vtt.fi:publications/bb17c77a-f574-4921-a5cb-32dc1f283fa3"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"VTT Technical Research Centre of Finland"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Veijalainen , J & Wolski , A 1991 , Prepare and commit certification for decentralized transaction management in rigorous multidatabases : Research Report No. J-1 . VTT Technical Research Centre of Finland ."}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Prepare and commit certification for decentralized transaction management in rigorous multidatabases:Research Report No. J-1"}]} +{"author":[{"fullname":"Hanhijärvi, Antti","name":"Antti","pid":[],"rank":1,"surname":"Hanhijärvi"},{"fullname":"Hukka, A.","name":"A.","pid":[],"rank":2,"surname":"Hukka"},{"fullname":"Paajanen, T.","name":"T.","pid":[],"rank":3,"surname":"Paajanen"},{"fullname":"Pulkkinen, P.","name":"P.","pid":[],"rank":4,"surname":"Pulkkinen"},{"fullname":"Sundman, S.","name":"S.","pid":[],"rank":5,"surname":"Sundman"}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2003-01-01"},"dateofcollection":"2022-02-28T12:32:33.974Z","dateoftransformation":"2022-02-28T17:38:24.191Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"The paper presents experimental results of jet drying tests of birch (Betula pendula) and spruce (Picea abies) veneers at temperatures of 140°C and 190°C. Results include drying rates for 1.5 mm thick birch veneers as well as 1.5 mm and 2.6 mm thick spruce veneers of both heartwood and sapwood. Based on the test results, material parameter values for a simplified drying model are assessed. The model is based on the use of an effective diffusion coefficient and an effective surface emission coefficient, which values are calibrated to fit to the experimental results. It is observed, that separate model parameter sets are needed for the two different species but also for occurrence of heartwood or sapwood (spruce) and different thickness values of veneers."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|dedup_wf_001::08d6f2001319c86d0e69b0f83ad75df2","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1007/s00107-003-0379-4"}],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2003-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/8cc91100-904f-43c5-bb3d-1cc3e0f4a4b5"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1646505760180,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2022-01-31T21:18:51Z","harvestDate":"2022-02-28T12:32:33.974Z","identifier":"oai:cris.vtt.fi:publications/8cc91100-904f-43c5-bb3d-1cc3e0f4a4b5","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["50|355e65625b88::9cb10895b4a92b0215b85acb2c3268b9","oai:cris.vtt.fi:publications/8cc91100-904f-43c5-bb3d-1cc3e0f4a4b5"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Hanhijärvi , A , Hukka , A , Paajanen , T , Pulkkinen , P & Sundman , S 2003 , ' Experimental investigation of jet drying of birch and spruce veneers and modelling with a simplified approach ' , Holz als Roh- und Werkstoff , vol. 61 , no. 2 , pp. 83-88 . https://doi.org/10.1007/s00107-003-0379-4"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"jet drying"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"drying"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"veneers"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"birch"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"spruce"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"heartwood"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"sapwood"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Experimental investigation of jet drying of birch and spruce veneers and modelling with a simplified approach"}]} +{"author":[{"fullname":"Vainonen-Ahlgren, Elizaveta","name":"Elizaveta","pid":[],"rank":1,"surname":"Vainonen-Ahlgren"},{"fullname":"Likonen, Jari","name":"Jari","pid":[],"rank":2,"surname":"Likonen"},{"fullname":"Renvall,","pid":[],"rank":3},{"fullname":"Rohde, V.","name":"V.","pid":[],"rank":4,"surname":"Rohde"},{"fullname":"Mayer, M.","name":"M.","pid":[],"rank":5,"surname":"Mayer"}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2007-01-01"},"dateofcollection":"2022-02-28T12:32:50.667Z","dateoftransformation":"2022-02-28T17:49:49.964Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"To investigate material transport in scrape-off layer plasma and long term deposition in divertor, 13CH4 was puffed at the end of 2004 and 2005 experimental campaigns into ASDEX Upgrade from the outer mid-plane.
Ex situ analyses of the tiles were performed by secondary ion mass spectrometry.
The peaks of 13C were detected below the bottom inner strike point and at the horizontal tile at the outer lower divertor. It was detected ∼21% of the total puffed 13C amount.
The deposition rate for carbon by plasma was also calculated in long term experiment. It was obtained to be 22 × 10−3 and 8.7 × 10−3 g/s for the upper (campaign 2004) and lower (campaign 2003) divertors, respectively."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|dedup_wf_001::06e51d2bf295531b2d2e7a1b55500783","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1016/j.jnucmat.2007.01.026"}],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2007-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Patent","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/2472b21e-1fdc-4121-946e-e9c8fae6d02d"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1646505766149,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2022-02-01T02:35:05Z","harvestDate":"2022-02-28T12:32:50.667Z","identifier":"oai:cris.vtt.fi:publications/2472b21e-1fdc-4121-946e-e9c8fae6d02d","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["50|355e65625b88::a29614444f5030f11e75c6c27264d272","oai:cris.vtt.fi:publications/2472b21e-1fdc-4121-946e-e9c8fae6d02d"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Vainonen-Ahlgren , E , Likonen , J , Renvall , Rohde , V & Mayer , M 2007 , ' Migration of 13C and deposition at ASDEX Upgrade ' , Journal of Nuclear Materials , vol. 363-365 , pp. 270-275 . https://doi.org/10.1016/j.jnucmat.2007.01.026"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"ASDEX upgrade"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"divertor"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"carbon based materials"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"erosion"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"deposition"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"ITER"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"JET"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"plasma"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"fusion energy"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Migration of 13C and deposition at ASDEX Upgrade"}]} +{"author":[{"fullname":"Aalto, Timo","name":"Timo","pid":[],"rank":1,"surname":"Aalto"},{"fullname":"Harjanne, Mikko","name":"Mikko","pid":[],"rank":2,"surname":"Harjanne"},{"fullname":"Kapulainen, Markku","name":"Markku","pid":[],"rank":3,"surname":"Kapulainen"}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[{"dataInfo": null,"id": "beopen"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2003-01-01"},"dateofcollection":"2022-02-28T12:32:37.581Z","dateoftransformation":"2022-02-28T19:39:18.717Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"A novel method has been developed for measuring the rotational angle of a fiber's or a waveguide's polarization axis with respect to a reference angle. The reference angle is the polarization axis of the measuring device. The method also gives the true polarization extinction ratio of the measured fiber or waveguide. The method is suitable for the characterization and rotational alignment of polarization-maintaining waveguides and fibers. In particular, the method can be used to rotationally align the fiber-waveguide interconnections during waveguide characterization. The measuring device is either a linear polarizer or a polarization splitter that is accurately rotated with respect to the device under test. According to the experiments with a polarization-maintaining fiber, the method is very easy and inexpensive to implement, and the angular accuracy can be better than 0.2 deg."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|openorgs____::64badd35233ba2cd4946368ef2f4cf57","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1117/1.1600730"}],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2003-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/8cd538fb-6484-4655-81dd-47348d358fd4"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1646505829230,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2022-01-31T21:47:37Z","harvestDate":"2022-02-28T12:32:37.581Z","identifier":"oai:cris.vtt.fi:publications/8cd538fb-6484-4655-81dd-47348d358fd4","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["50|355e65625b88::df0143af011fd82af8ac2d07b03ee8cd","oai:cris.vtt.fi:publications/8cd538fb-6484-4655-81dd-47348d358fd4"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Aalto , T , Harjanne , M & Kapulainen , M 2003 , ' Method for the rotational alignment of polarization-maintaining optical fibers and waveguides ' , Optical Engineering , vol. 42 , no. 10 , pp. 2861-2867 . https://doi.org/10.1117/1.1600730"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"optical waveguide"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"polarization-maintaining fiber"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"characterization"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"fiber-waveguide coupling"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"polarization"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"polarization axis"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"polarizer"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"polarization splitter"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Method for the rotational alignment of polarization-maintaining optical fibers and waveguides"}]} +{"author":[{"fullname":"Aalto, Timo","name":"Timo","pid":[],"rank":1,"surname":"Aalto"},{"fullname":"Harjanne, Mikko","name":"Mikko","pid":[],"rank":2,"surname":"Harjanne"},{"fullname":"Kapulainen, Markku","name":"Markku","pid":[],"rank":3,"surname":"Kapulainen"}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[{"dataInfo": null,"id": "beopen"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2003-01-01"},"dateofcollection":"2022-02-28T12:32:37.581Z","dateoftransformation":"2022-02-28T19:39:18.717Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"A novel method has been developed for measuring the rotational angle of a fiber's or a waveguide's polarization axis with respect to a reference angle. The reference angle is the polarization axis of the measuring device. The method also gives the true polarization extinction ratio of the measured fiber or waveguide. The method is suitable for the characterization and rotational alignment of polarization-maintaining waveguides and fibers. In particular, the method can be used to rotationally align the fiber-waveguide interconnections during waveguide characterization. The measuring device is either a linear polarizer or a polarization splitter that is accurately rotated with respect to the device under test. According to the experiments with a polarization-maintaining fiber, the method is very easy and inexpensive to implement, and the angular accuracy can be better than 0.2 deg."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|openorgs____::64badd35233ba2cd4946368ef2f4cf57","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1117/1.1600730"}],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2003-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/8cd538fb-6484-4655-81dd-47348d358fd4"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1646505829230,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2022-01-31T21:47:37Z","harvestDate":"2022-02-28T12:32:37.581Z","identifier":"oai:cris.vtt.fi:publications/8cd538fb-6484-4655-81dd-47348d358fd4","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["50|355e65625b88::df0143af011fd82af8ac2d07b03ee8cd","oai:cris.vtt.fi:publications/8cd538fb-6484-4655-81dd-47348d358fd4"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Aalto , T , Harjanne , M & Kapulainen , M 2003 , ' Method for the rotational alignment of polarization-maintaining optical fibers and waveguides ' , Optical Engineering , vol. 42 , no. 10 , pp. 2861-2867 . https://doi.org/10.1117/1.1600730"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"optical waveguide"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"polarization-maintaining fiber"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"characterization"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"fiber-waveguide coupling"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"polarization"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"polarization axis"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"polarizer"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"polarization splitter"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Method for the rotational alignment of polarization-maintaining optical fibers and waveguides"}]} +{"author":[{"fullname":"Penttilä, Raimo","name":"Raimo","pid":[],"rank":1,"surname":"Penttilä"},{"fullname":"Vanttaja, Ilkka","name":"Ilkka","pid":[],"rank":2,"surname":"Vanttaja"},{"fullname":"Haapamäki, Petteri","name":"Petteri","pid":[],"rank":3,"surname":"Haapamäki"},{"fullname":"Kujanpää, Veli","name":"Veli","pid":[],"rank":4,"surname":"Kujanpää"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|opendoar____::f0dd4a99fba6075a9494772b58f95280","value":"VTT Research Information System"}],"context": [{"dataInfo":null, "id":"dh-ch"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1994-01-01"},"dateofcollection":"2022-02-28T12:35:26.769Z","dateoftransformation":"2022-02-28T19:54:10.494Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::e7d48a470b13bda61f7ebe3513e20cb6","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1994-01-01"},"distributionlocation":"","hostedby":{"key":"10|eurocrisdris::9ae43d14471c4b33661fedda6f06b539","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/ddbd28ea-5fbf-43e1-896f-e69856870c26"]}],"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1646505838552,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2019-08-08T07:09:42Z","harvestDate":"2022-02-28T12:35:26.769Z","identifier":"oai:cris.vtt.fi:publications/ddbd28ea-5fbf-43e1-896f-e69856870c26","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/ddbd28ea-5fbf-43e1-896f-e69856870c26","50|355e65625b88::e7d48a470b13bda61f7ebe3513e20cb6"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Penttilä , R , Vanttaja , I , Haapamäki , P & Kujanpää , V 1994 , ' Liimauksen ja puristusliittämisen yhdistämisellä lisää lujuutta, jäykkyyttä ja tiiveyttä ' , Ohutlevyuutiset , no. 2 , pp. 17-19 ."}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Liimauksen ja puristusliittämisen yhdistämisellä lisää lujuutta, jäykkyyttä ja tiiveyttä"}]} \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/graph/relation/part-00000.json b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/graph/relation/part-00000.json new file mode 100644 index 000000000..1733e2549 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/graph/relation/part-00000.json @@ -0,0 +1,24 @@ +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"propagation","inferred":true,"invisible":false,"provenanceaction":{"classid":"result:organization:instrepo","classname":"Propagation of affiliation to result collected from datasources of type institutional repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.85"},"properties":[],"relClass":"issupplementedby","relType":"resultOrganization","source":"50|355e65625b88::e7d48a470b13bda61f7ebe3513e20cb6","subRelType":"affiliation","target":"50|pending_org_::82f63b2d21ae88596b9d8991780e9888","validated":false} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"propagation","inferred":true,"invisible":false,"provenanceaction":{"classid":"result:organization:instrepo","classname":"Propagation of affiliation to result collected from datasources of type institutional repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.85"},"properties":[],"relClass":"issupplementedby","relType":"resultOrganization","source":"50|355e65625b88::e7d48a470b13bda61f7ebe3513e20cb6","subRelType":"affiliation","target":"50|dedup_wf_001::06e51d2bf295531b2d2e7a1b55500783","validated":false} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"propagation","inferred":true,"invisible":false,"provenanceaction":{"classid":"result:organization:instrepo","classname":"Propagation of affiliation to result collected from datasources of type institutional repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.85"},"properties":[],"relClass":"IsProvidedBy","relType":"resultOrganization","source":"10|opendoar____::f0dd4a99fba6075a9494772b58f95280","subRelType":"affiliation","target":"20|openorgs____::322ff2a6524820640bc5d1311871585e","validated":false} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"propagation","inferred":true,"invisible":false,"provenanceaction":{"classid":"result:organization:instrepo","classname":"Propagation of affiliation to result collected from datasources of type institutional repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.85"},"properties":[],"relClass":"IsProvidedBy","relType":"resultOrganization","source":"10|eurocrisdris::9ae43d14471c4b33661fedda6f06b539","subRelType":"affiliation","target":"20|openorgs____::58e60f1715d219aa6757ba0b0f2ccbce","validated":false} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"propagation","inferred":true,"invisible":false,"provenanceaction":{"classid":"result:organization:instrepo","classname":"Propagation of affiliation to result collected from datasources of type institutional repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.85"},"properties":[],"relClass":"IsProvidedBy","relType":"resultOrganization","target":"20|openorgs____::64badd35233ba2cd4946368ef2f4cf57","subRelType":"affiliation","source":"10|issn___print::a7a2010e75d849442790955162ef4e42","validated":false} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"propagation","inferred":true,"invisible":false,"provenanceaction":{"classid":"result:organization:instrepo","classname":"Propagation of affiliation to result collected from datasources of type institutional repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.85"},"properties":[],"relClass":"IsProvidedBy","relType":"resultOrganization","source":"10|issn___print::a7a2010e75d849442790955162ef4e43","subRelType":"affiliation","target":"20|openorgs____::64badd35233ba2cd4946368ef2f4cf57","validated":false} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"propagation","inferred":true,"invisible":false,"provenanceaction":{"classid":"result:organization:instrepo","classname":"Propagation of affiliation to result collected from datasources of type institutional repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.85"},"properties":[],"relClass":"IsProvidedBy","relType":"resultOrganization","source":"10|issn___print::a7a2010e75d849442790955162ef4e44","subRelType":"affiliation","target":"20|openorgs____::548cbb0c5a93722f3a9aa62aa17a1ba1","validated":false} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"propagation","inferred":true,"invisible":false,"provenanceaction":{"classid":"result:organization:instrepo","classname":"Propagation of affiliation to result collected from datasources of type institutional repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.85"},"properties":[],"relClass":"IsProvidedBy","relType":"resultOrganization","source":"10|issn___print::a7a2010e75d849442790955162ef4e45","subRelType":"affiliation","target":"20|pending_org_::c522a7c935f9fd9578122e60eeec282c","validated":false} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"propagation","inferred":true,"invisible":false,"provenanceaction":{"classid":"result:organization:instrepo","classname":"Propagation of affiliation to result collected from datasources of type institutional repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.85"},"properties":[],"relClass":"isrelatedto","relType":"resultOrganization","source":"50|openorgs____::64badd35233ba2cd4946368ef2f4cf57","subRelType":"affiliation","target":"50|dedup_wf_001::06e51d2bf295531b2d2e7a1b55500783","validated":false} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"propagation","inferred":true,"invisible":false,"provenanceaction":{"classid":"result:organization:instrepo","classname":"Propagation of affiliation to result collected from datasources of type institutional repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.85"},"properties":[],"relClass":"hasAuthorInstitution","relType":"resultOrganization","source":"50|dedup_wf_001::06e51d2bf295531b2d2e7a1b55500783","subRelType":"affiliation","target":"20|openorgs____::64badd35233ba2cd4946368ef2f4cf57","validated":false} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"propagation","inferred":true,"invisible":false,"provenanceaction":{"classid":"result:organization:instrepo","classname":"Propagation of affiliation to result collected from datasources of type institutional repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.85"},"properties":[],"relClass":"isrelatedto","relType":"resultOrganization","source":"50|355e65625b88::74009c567c81b4aa55c813db658734df","subRelType":"affiliation","target":"50|dedup_wf_001::08d6f2001319c86d0e69b0f83ad75df2","validated":false} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"propagation","inferred":true,"invisible":false,"provenanceaction":{"classid":"result:organization:instrepo","classname":"Propagation of affiliation to result collected from datasources of type institutional repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.85"},"properties":[],"relClass":"hasAuthorInstitution","relType":"resultOrganization","source":"50|dedup_wf_001::08d6f2001319c86d0e69b0f83ad75df2","subRelType":"affiliation","target":"20|openorgs____::91a81877815afb4ebf25c1a3f3b03c5d","validated":false} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"propagation","inferred":true,"invisible":false,"provenanceaction":{"classid":"result:organization:instrepo","classname":"Propagation of affiliation to result collected from datasources of type institutional repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.85"},"properties":[],"relClass":"isAuthorInstitutionOf","relType":"resultOrganization","source":"20|openorgs____::548cbb0c5a93722f3a9aa62aa17a1ba1","subRelType":"affiliation","target":"50|dedup_wf_001::0a1cdf269375d32ce341fdeb0e92dfa8","validated":false} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"propagation","inferred":true,"invisible":false,"provenanceaction":{"classid":"result:organization:instrepo","classname":"Propagation of affiliation to result collected from datasources of type institutional repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.85"},"properties":[],"relClass":"hasAuthorInstitution","relType":"resultOrganization","source":"50|dedup_wf_001::0a1cdf269375d32ce341fdeb0e92dfa8","subRelType":"affiliation","target":"20|openorgs____::548cbb0c5a93722f3a9aa62aa17a1ba1","validated":false} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"propagation","inferred":true,"invisible":false,"provenanceaction":{"classid":"result:organization:instrepo","classname":"Propagation of affiliation to result collected from datasources of type institutional repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.85"},"properties":[],"relClass":"isAuthorInstitutionOf","relType":"resultOrganization","source":"20|pending_org_::a50fdd7f7e77b74ea2b16823151c391a","subRelType":"affiliation","target":"50|dedup_wf_001::0ab92bed024ee6883c7a1244722e5eec","validated":false} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"propagation","inferred":true,"invisible":false,"provenanceaction":{"classid":"result:organization:instrepo","classname":"Propagation of affiliation to result collected from datasources of type institutional repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.85"},"properties":[],"relClass":"hasAuthorInstitution","relType":"resultOrganization","source":"50|dedup_wf_001::0ab92bed024ee6883c7a1244722e5eec","subRelType":"affiliation","target":"20|pending_org_::a50fdd7f7e77b74ea2b16823151c391a","validated":false} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"propagation","inferred":true,"invisible":false,"provenanceaction":{"classid":"result:organization:instrepo","classname":"Propagation of affiliation to result collected from datasources of type institutional repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.85"},"properties":[],"relClass":"isAuthorInstitutionOf","relType":"resultOrganization","source":"20|openorgs____::64badd35233ba2cd4946368ef2f4cf57","subRelType":"affiliation","target":"50|dedup_wf_001::0ca26c736ad4d15b3d5ee90a4d7853e1","validated":false} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"propagation","inferred":true,"invisible":false,"provenanceaction":{"classid":"result:organization:instrepo","classname":"Propagation of affiliation to result collected from datasources of type institutional repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.85"},"properties":[],"relClass":"hasAuthorInstitution","relType":"resultOrganization","source":"50|dedup_wf_001::0ca26c736ad4d15b3d5ee90a4d7853e1","subRelType":"affiliation","target":"20|openorgs____::64badd35233ba2cd4946368ef2f4cf57","validated":false} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"propagation","inferred":true,"invisible":false,"provenanceaction":{"classid":"result:organization:instrepo","classname":"Propagation of affiliation to result collected from datasources of type institutional repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.85"},"properties":[],"relClass":"isAuthorInstitutionOf","relType":"resultOrganization","source":"20|pending_org_::a50fdd7f7e77b74ea2b16823151c391a","subRelType":"affiliation","target":"50|dedup_wf_001::0ef8dfab3927cb4d69df0d3113f05a42","validated":false} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"propagation","inferred":true,"invisible":false,"provenanceaction":{"classid":"result:organization:instrepo","classname":"Propagation of affiliation to result collected from datasources of type institutional repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.85"},"properties":[],"relClass":"hasAuthorInstitution","relType":"resultOrganization","source":"50|dedup_wf_001::0ef8dfab3927cb4d69df0d3113f05a42","subRelType":"affiliation","target":"20|pending_org_::a50fdd7f7e77b74ea2b16823151c391a","validated":false} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"propagation","inferred":true,"invisible":false,"provenanceaction":{"classid":"result:organization:instrepo","classname":"Propagation of affiliation to result collected from datasources of type institutional repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.85"},"properties":[],"relClass":"isAuthorInstitutionOf","relType":"resultOrganization","source":"20|openorgs____::548cbb0c5a93722f3a9aa62aa17a1ba1","subRelType":"affiliation","target":"50|dedup_wf_001::0f488ad00253126c14a21abe6b2d406c","validated":false} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"propagation","inferred":true,"invisible":false,"provenanceaction":{"classid":"result:organization:instrepo","classname":"Propagation of affiliation to result collected from datasources of type institutional repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.85"},"properties":[],"relClass":"hasAuthorInstitution","relType":"resultOrganization","source":"50|dedup_wf_001::0f488ad00253126c14a21abe6b2d406c","subRelType":"affiliation","target":"20|openorgs____::548cbb0c5a93722f3a9aa62aa17a1ba1","validated":false} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"propagation","inferred":true,"invisible":false,"provenanceaction":{"classid":"result:organization:instrepo","classname":"Propagation of affiliation to result collected from datasources of type institutional repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.85"},"properties":[],"relClass":"isAuthorInstitutionOf","relType":"resultOrganization","source":"20|pending_org_::c522a7c935f9fd9578122e60eeec282c","subRelType":"affiliation","target":"50|dedup_wf_001::12206bf78aabd7d52132477182d19147","validated":false} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"propagation","inferred":true,"invisible":false,"provenanceaction":{"classid":"result:organization:instrepo","classname":"Propagation of affiliation to result collected from datasources of type institutional repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.85"},"properties":[],"relClass":"hasAuthorInstitution","relType":"resultOrganization","source":"50|dedup_wf_001::12206bf78aabd7d52132477182d19147","subRelType":"affiliation","target":"20|pending_org_::c522a7c935f9fd9578122e60eeec282c","validated":false} \ No newline at end of file From 189a7c255a52127f79db59d7cde171f216d76152 Mon Sep 17 00:00:00 2001 From: Miriam Baglioni Date: Mon, 25 Nov 2024 16:52:13 +0100 Subject: [PATCH 36/73] [patents] added test and resources --- .../PrepareResultCommunitySetStep1.java | 11 ----------- .../ResultToCommunityJobTest.java | 2 ++ 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep1.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep1.java index 8f23a4cfd..5af2bf481 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep1.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep1.java @@ -173,24 +173,13 @@ public class PrepareResultCommunitySetStep1 { "AND lower(co.id) IN " + communityIdList, "AND lower(relClass) IN " + allowedsemrel); Dataset result_context = spark.sql(resultContextQuery); - //result_context.createOrReplaceTempView("result_context"); - -// spark -// .sql(RESULT_COMMUNITY_LIST_QUERY) -// .as(Encoders.bean(ResultCommunityList.class)) -// .write() -// .option("compression", "gzip") -// .mode(SaveMode.Overwrite) -// .json(outputResultPath); Dataset rwc = spark.sql(String.format(RESULT_WITH_CONTEXT, communityIdList)); Dataset patents = spark.sql(RESULT_PATENT); Dataset relatedToRelations = spark.sql(IS_RELATED_TO_RELATIONS); rwc.createOrReplaceTempView("resultWithContext"); - patents.createOrReplaceTempView("patents"); - relatedToRelations.createOrReplaceTempView("relatedTorelations"); diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/ResultToCommunityJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/ResultToCommunityJobTest.java index 4361b6f39..c1fcff4d9 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/ResultToCommunityJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/ResultToCommunityJobTest.java @@ -326,4 +326,6 @@ public class ResultToCommunityJobTest { Assertions.assertEquals(1, communities.size()); Assertions.assertEquals("dh-ch", communities.get(0)); } + + } From 2e54715d715426cd66cca94f1b63fe29abbcc8ce Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 22 Nov 2024 10:05:56 +0100 Subject: [PATCH 37/73] Applying PR#512 - Sequential ActionSet promotion --- .../wf/main/oozie_app/workflow.xml | 47 ++++--------------- 1 file changed, 9 insertions(+), 38 deletions(-) diff --git a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/main/oozie_app/workflow.xml b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/main/oozie_app/workflow.xml index 7ccfb342e..cdb4de908 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/main/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/main/oozie_app/workflow.xml @@ -135,22 +135,10 @@ --outputPath${workingDir}/action_payload_by_type --isLookupUrl${isLookupUrl} - +
- - - - - - - - - - - - ${wf:appPath()}/promote_action_payload_for_dataset_table @@ -162,7 +150,7 @@ - + @@ -177,7 +165,7 @@ - +
@@ -192,7 +180,7 @@ - + @@ -207,7 +195,7 @@ - + @@ -222,7 +210,7 @@ - + @@ -237,7 +225,7 @@ - + @@ -252,7 +240,7 @@ - + @@ -267,26 +255,9 @@ - + - - - ${wf:appPath()}/promote_action_payload_for_person_table - - - - inputActionPayloadRootPath - ${workingDir}/action_payload_by_type - - - - - - - - - \ No newline at end of file From ca2d480df372b174d92f141f38cdf14b19130979 Mon Sep 17 00:00:00 2001 From: Miriam Baglioni Date: Tue, 26 Nov 2024 13:36:27 +0100 Subject: [PATCH 38/73] [BulkTagging] added fix to consider when the set of constraints for the datasource is empty. Added check for remove constraints and advanced constraints to verify if the constraints list is empty and in that case do nothing --- .../dhp/bulktag/community/ResultTagger.java | 15 +++++++++------ .../bulktag/community/SelectionConstraints.java | 2 ++ 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java index 2ea229e3e..64cbd70ba 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java @@ -130,6 +130,7 @@ public class ResultTagger implements Serializable { // log.info("Remove constraints for " + communityId); if (conf.getRemoveConstraintsMap().keySet().contains(communityId) && conf.getRemoveConstraintsMap().get(communityId).getCriteria() != null && + !conf.getRemoveConstraintsMap().get(communityId).getCriteria().isEmpty() && conf .getRemoveConstraintsMap() .get(communityId) @@ -161,29 +162,30 @@ public class ResultTagger implements Serializable { // Tagging for datasource final Set datasources = new HashSet<>(); - final Set collfrom = new HashSet<>(); + final Set cfhb = new HashSet<>(); final Set hostdby = new HashSet<>(); if (Objects.nonNull(result.getInstance())) { for (Instance i : result.getInstance()) { if (Objects.nonNull(i.getCollectedfrom()) && Objects.nonNull(i.getCollectedfrom().getKey())) { - collfrom.add(i.getCollectedfrom().getKey()); + cfhb.add(i.getCollectedfrom().getKey()); } if (Objects.nonNull(i.getHostedby()) && Objects.nonNull(i.getHostedby().getKey())) { + cfhb.add(i.getHostedby().getKey()); hostdby.add(i.getHostedby().getKey()); } } - collfrom + cfhb .forEach( dsId -> datasources .addAll( conf.getCommunityForDatasource(dsId, param))); hostdby.forEach(dsId -> { - datasources - .addAll( - conf.getCommunityForDatasource(dsId, param)); +// datasources +// .addAll( +// conf.getCommunityForDatasource(dsId, param)); if (conf.isEoscDatasource(dsId)) { datasources.add("eosc"); } @@ -226,6 +228,7 @@ public class ResultTagger implements Serializable { .forEach(communityId -> { if (!removeCommunities.contains(communityId) && conf.getSelectionConstraintsMap().get(communityId).getCriteria() != null && + !conf.getSelectionConstraintsMap().get(communityId).getCriteria().isEmpty() && conf .getSelectionConstraintsMap() .get(communityId) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/SelectionConstraints.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/SelectionConstraints.java index 57cc658fc..8a23a7017 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/SelectionConstraints.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/SelectionConstraints.java @@ -33,6 +33,8 @@ public class SelectionConstraints implements Serializable { // Constraints in or public boolean verifyCriteria(final Map> param) { + if (criteria.isEmpty()) + return true; for (Constraints selc : criteria) { if (selc.verifyCriteria(param)) { return true; From 0517e452e34cf30d471552f5930983a9435a91c5 Mon Sep 17 00:00:00 2001 From: "sandro.labruzzo" Date: Mon, 2 Dec 2024 14:00:59 +0100 Subject: [PATCH 39/73] Fixed error on empty affiliation --- .../main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMAffiliation.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMAffiliation.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMAffiliation.java index 54aba8715..a8dacd132 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMAffiliation.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMAffiliation.java @@ -23,9 +23,8 @@ public class PMAffiliation { return name; } - public PMAffiliation setName(String name) { + public void setName(String name) { this.name = name; - return this; } public PMIdentifier getIdentifier() { From cc6bbbb80474553a4d3e71bc0fa80c4b84035194 Mon Sep 17 00:00:00 2001 From: "sandro.labruzzo" Date: Tue, 3 Dec 2024 14:31:11 +0100 Subject: [PATCH 40/73] make setter void --- .../main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMAffiliation.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMAffiliation.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMAffiliation.java index a8dacd132..e3dcc95dd 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMAffiliation.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMAffiliation.java @@ -31,8 +31,7 @@ public class PMAffiliation { return identifier; } - public PMAffiliation setIdentifier(PMIdentifier identifier) { + public void setIdentifier(PMIdentifier identifier) { this.identifier = identifier; - return this; } } From 65902a87e3ae7b161cd8e682f4a3535df996e3cd Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Wed, 4 Dec 2024 13:18:17 +0100 Subject: [PATCH 41/73] support of the new apis --- .../plugin/researchfi/ResearchFiIterator.java | 54 +++++++++---------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/researchfi/ResearchFiIterator.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/researchfi/ResearchFiIterator.java index 269a89f71..cad499962 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/researchfi/ResearchFiIterator.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/researchfi/ResearchFiIterator.java @@ -6,7 +6,7 @@ import java.util.Queue; import java.util.concurrent.PriorityBlockingQueue; import org.apache.commons.io.IOUtils; -import org.apache.commons.lang3.math.NumberUtils; +import org.apache.commons.lang3.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.http.Header; @@ -27,25 +27,25 @@ public class ResearchFiIterator implements Iterator { private final String baseUrl; private final String authToken; - private int currPage; - private int nPages; + private String nextUrl; + private int nCalls = 0; private final Queue queue = new PriorityBlockingQueue<>(); public ResearchFiIterator(final String baseUrl, final String authToken) { this.baseUrl = baseUrl; this.authToken = authToken; - this.currPage = 0; - this.nPages = 0; + this.nextUrl = null; } private void verifyStarted() { - if (this.currPage == 0) { - try { - nextCall(); - } catch (final CollectorException e) { - throw new IllegalStateException(e); + + try { + if (this.nCalls == 0) { + this.nextUrl = invokeUrl(this.baseUrl); } + } catch (final CollectorException e) { + throw new IllegalStateException(e); } } @@ -62,9 +62,9 @@ public class ResearchFiIterator implements Iterator { synchronized (this.queue) { verifyStarted(); final String res = this.queue.poll(); - while (this.queue.isEmpty() && (this.currPage < this.nPages)) { + while (this.queue.isEmpty() && StringUtils.isNotBlank(this.nextUrl)) { try { - nextCall(); + this.nextUrl = invokeUrl(this.nextUrl); } catch (final CollectorException e) { throw new IllegalStateException(e); } @@ -73,18 +73,11 @@ public class ResearchFiIterator implements Iterator { } } - private void nextCall() throws CollectorException { + private String invokeUrl(final String url) throws CollectorException { - this.currPage += 1; + this.nCalls += 1; + String next = null; - final String url; - if (!this.baseUrl.contains("?")) { - url = String.format("%s?PageNumber=%d&PageSize=%d", this.baseUrl, this.currPage, PAGE_SIZE); - } else if (!this.baseUrl.contains("PageSize=")) { - url = String.format("%s&PageNumber=%d&PageSize=%d", this.baseUrl, this.currPage, PAGE_SIZE); - } else { - url = String.format("%s&PageNumber=%d", this.baseUrl, this.currPage); - } log.info("Calling url: " + url); try (final CloseableHttpClient client = HttpClients.createDefault()) { @@ -94,11 +87,15 @@ public class ResearchFiIterator implements Iterator { try (final CloseableHttpResponse response = client.execute(req)) { for (final Header header : response.getAllHeaders()) { log.debug("HEADER: " + header.getName() + " = " + header.getValue()); - if ("x-page-count".equals(header.getName())) { - final int totalPages = NumberUtils.toInt(header.getValue()); - if (this.nPages != totalPages) { - this.nPages = NumberUtils.toInt(header.getValue()); - log.info("Total pages: " + totalPages); + if ("link".equals(header.getName())) { + final String s = StringUtils.substringBetween(header.getValue(), "<", ">"); + final String token = StringUtils + .substringBefore(StringUtils.substringAfter(s, "NextPageToken="), "&"); + + if (this.baseUrl.contains("?")) { + next = this.baseUrl + "&NextPageToken=" + token; + } else { + next = this.baseUrl + "?NextPageToken=" + token; } } } @@ -108,6 +105,9 @@ public class ResearchFiIterator implements Iterator { jsonArray.forEach(obj -> this.queue.add(JsonUtils.convertToXML(obj.toString()))); } + + return next; + } catch (final Throwable e) { log.warn("Error calling url: " + url, e); throw new CollectorException("Error calling url: " + url, e); From 32e2a8b34053dc8162bf20bb91c977d544d05b94 Mon Sep 17 00:00:00 2001 From: "sandro.labruzzo" Date: Wed, 4 Dec 2024 13:36:21 +0100 Subject: [PATCH 42/73] implemented zenodo dump collector plugin --- .../CollectZenodoDumpCollectorPlugin.java | 96 ++++++++++++++++++ .../plugin/zenodo/ZenodoTarIterator.java | 59 +++++++++++ .../zenodo/ZenodoPluginCollectionTest.java | 35 +++++++ .../dhp/collection/zenodo/zenodo.tar.gz | Bin 0 -> 7412 bytes 4 files changed, 190 insertions(+) create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/zenodo/CollectZenodoDumpCollectorPlugin.java create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/zenodo/ZenodoTarIterator.java create mode 100644 dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/zenodo/ZenodoPluginCollectionTest.java create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/zenodo/zenodo.tar.gz diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/zenodo/CollectZenodoDumpCollectorPlugin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/zenodo/CollectZenodoDumpCollectorPlugin.java new file mode 100644 index 000000000..3ea29a9b0 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/zenodo/CollectZenodoDumpCollectorPlugin.java @@ -0,0 +1,96 @@ + +package eu.dnetlib.dhp.collection.plugin.zenodo; + +import static eu.dnetlib.dhp.utils.DHPUtils.getHadoopConfiguration; + +import java.io.IOException; +import java.io.InputStream; +import java.util.stream.Stream; +import java.util.stream.StreamSupport; + +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.compress.CompressionCodec; +import org.apache.hadoop.io.compress.CompressionCodecFactory; +import org.apache.http.client.config.RequestConfig; +import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClientBuilder; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import eu.dnetlib.dhp.collection.ApiDescriptor; +import eu.dnetlib.dhp.collection.plugin.CollectorPlugin; +import eu.dnetlib.dhp.common.aggregation.AggregatorReport; +import eu.dnetlib.dhp.common.collection.CollectorException; + +public class CollectZenodoDumpCollectorPlugin implements CollectorPlugin { + + final private Logger log = LoggerFactory.getLogger(getClass()); + + private void downloadItem(final String name, final String itemURL, final String basePath, + final FileSystem fileSystem) { + try { + final Path hdfsWritePath = new Path(String.format("%s/%s", basePath, name)); + final FSDataOutputStream fsDataOutputStream = fileSystem.create(hdfsWritePath, true); + final HttpGet request = new HttpGet(itemURL); + final int timeout = 60; // seconds + final RequestConfig config = RequestConfig + .custom() + .setConnectTimeout(timeout * 1000) + .setConnectionRequestTimeout(timeout * 1000) + .setSocketTimeout(timeout * 1000) + .build(); + log.info("Downloading url {} into {}", itemURL, hdfsWritePath.getName()); + try (CloseableHttpClient client = HttpClientBuilder.create().setDefaultRequestConfig(config).build(); + CloseableHttpResponse response = client.execute(request)) { + int responseCode = response.getStatusLine().getStatusCode(); + log.info("Response code is {}", responseCode); + if (responseCode >= 200 && responseCode < 400) { + IOUtils.copy(response.getEntity().getContent(), fsDataOutputStream); + } + } catch (Throwable eu) { + throw new RuntimeException(eu); + } + } catch (Throwable e) { + throw new RuntimeException(e); + } + } + + @Override + public Stream collect(ApiDescriptor api, AggregatorReport report) throws CollectorException { + try { + final String zenodoURL = api.getBaseUrl(); + final String hdfsURI = api.getParams().get("hdfsURI"); + final FileSystem fileSystem = FileSystem.get(getHadoopConfiguration(hdfsURI)); + downloadItem("zenodoDump.tar.gz", zenodoURL, "/tmp", fileSystem); + CompressionCodecFactory factory = new CompressionCodecFactory(fileSystem.getConf()); + + Path sourcePath = new Path("/tmp/zenodoDump.tar.gz"); + CompressionCodec codec = factory.getCodec(sourcePath); + InputStream gzipInputStream = null; + try { + gzipInputStream = codec.createInputStream(fileSystem.open(sourcePath)); + return iterateTar(gzipInputStream); + + } catch (IOException e) { + throw new CollectorException(e); + } finally { + log.info("Closing gzip stream"); + org.apache.hadoop.io.IOUtils.closeStream(gzipInputStream); + } + } catch (Exception e) { + throw new CollectorException(e); + } + } + + private Stream iterateTar(InputStream gzipInputStream) throws Exception { + + Iterable iterable = () -> new ZenodoTarIterator(gzipInputStream); + return StreamSupport.stream(iterable.spliterator(), false); + + } +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/zenodo/ZenodoTarIterator.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/zenodo/ZenodoTarIterator.java new file mode 100644 index 000000000..8e627683e --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/zenodo/ZenodoTarIterator.java @@ -0,0 +1,59 @@ + +package eu.dnetlib.dhp.collection.plugin.zenodo; + +import java.io.Closeable; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.util.Iterator; + +import org.apache.commons.compress.archivers.tar.TarArchiveEntry; +import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; +import org.apache.commons.io.IOUtils; + +public class ZenodoTarIterator implements Iterator, Closeable { + + private final InputStream gzipInputStream; + private final StringBuilder currentItem = new StringBuilder(); + private TarArchiveInputStream tais; + private boolean hasNext; + + public ZenodoTarIterator(InputStream gzipInputStream) { + this.gzipInputStream = gzipInputStream; + tais = new TarArchiveInputStream(gzipInputStream); + hasNext = getNextItem(); + } + + private boolean getNextItem() { + try { + TarArchiveEntry entry; + while ((entry = tais.getNextTarEntry()) != null) { + if (entry.isFile()) { + currentItem.setLength(0); + currentItem.append(IOUtils.toString(new InputStreamReader(tais))); + return true; + } + } + return false; + } catch (Throwable e) { + throw new RuntimeException(e); + } + } + + @Override + public boolean hasNext() { + return hasNext; + } + + @Override + public String next() { + final String data = currentItem.toString(); + hasNext = getNextItem(); + return data; + } + + @Override + public void close() throws IOException { + gzipInputStream.close(); + } +} diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/zenodo/ZenodoPluginCollectionTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/zenodo/ZenodoPluginCollectionTest.java new file mode 100644 index 000000000..9b5cf1850 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/zenodo/ZenodoPluginCollectionTest.java @@ -0,0 +1,35 @@ + +package eu.dnetlib.dhp.collection.plugin.zenodo; + +import static org.junit.jupiter.api.Assertions.assertNotNull; + +import java.util.zip.GZIPInputStream; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.collection.ApiDescriptor; +import eu.dnetlib.dhp.common.collection.CollectorException; + +public class ZenodoPluginCollectionTest { + + @Test + public void testZenodoIterator() throws Exception { + + final GZIPInputStream gis = new GZIPInputStream( + getClass().getResourceAsStream("/eu/dnetlib/dhp/collection/zenodo/zenodo.tar.gz")); + try (ZenodoTarIterator it = new ZenodoTarIterator(gis)) { + Assertions.assertTrue(it.hasNext()); + int i = 0; + while (it.hasNext()) { + Assertions.assertNotNull(it.next()); + i++; + } + Assertions.assertEquals(10, i); + + } + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/zenodo/zenodo.tar.gz b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/zenodo/zenodo.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6c06bf4e5a5d568aa6c534cc4636796d9938f97e GIT binary patch literal 7412 zcmV44`c0g6Vvm2)J$*9; zQILd$BG?2eTe;eI*ca%jx!CHh+Ka8;CuqOPKFOW~iu$oB*|FrFwjNqPe| zq*``Y6H&he#qs~5p&6x}wEXU+>u-+TGuam=sCmU1!?@QE6MHPb>q~ zJDZ-or$a^p_$O`h7>bL4`aAnIP$;y^3gnay6XugHBMlmjs6P;UrMZXq_xgOq0(F{K zbar>IM~B=WtMf_id!@N+d$iZ%K3BtOALg47J`p`q>qp#6{5XlIPe9F)4-C4<0~&D} zkW-I?LSPc;y!wYA$29Q;IgNn<#N z`YbvD83K1+C)^*f2nsc|rR-TOl+Pbk-@gWHrW3pggB)h<+vS{@EAix~^HRo?W=b&T zvCp#K{WkAw#3Vc*uYrV^G@!yRC9NTg`bbOxr+bVJ)>skT7>Fp5qyd@&=pk(p4OL@= zN^-(RjQXrgI%6O^>2fB?xdd;8P;x2~c={}&;a(|idGZUCgh|Ke5=P(t&q~+tm1bAc zo2i-oE2GiA2@dL{z}(e6nRGrfFP77@{W=FvX~Hz{7$#Szx6kOM2*hyyR%uoV7+*H| zA^1-^eh#Ee7#L)vp{P4W(^B3AyHW5w#B){Bw2Qi1G>oidX;O9Odf)?O{*TuzU@$zO zwIS`Z9h6WK!RVSVgOB^z!h)f@agE%#GIJWzd%(Y}YsaAdj(R)!w7i8&FmY<2AU&Tc zj{}UcTat!e1x6r0s%6tCI!NcWHot%{bTHNax8rw=aO*q8jd60AH?#I%Q!nH>CddxtDQi^GGre91>^jXgNH2{ zc`ssenq7w4)nohFNm^x>NiX6d_C+rIMdr@aUe-D?j;I&!WP0jol&VTlI-o<{Uu=%l~&G= z(AF5W_F}zIjp0Bo8!2am=`L3)_xjP@Oyb#}h5uMdgeRo-#qpUE2a#;#3t#jw>;7 z9dBCF046K7XD-CR@uIR|5(lz z|M?on7XR7eKljCdl-hU<{AbeegW*4`>fapx^CRULUqPuTkn|JSO13mUyk0CnNQ4>} z14L|gjt`4}stJdN{G@Tv+}TIfTZI43V!2u_7cHw?-776Vu^4fJ*@NIJ0et{Au;mF~ z^iylw8VNtaBIuGpgoHQ1)T^h!VxCcc0p8Ut+AL(3CO8ovabOYFnm!E1-Ou9I0%=nW zZk4Yyy><-Yt*7WX6+!=#PZw%`0}jq#k=g3z0Pa>n#3nR5pxj4_m(xbxu&4v>Nirla z&T0oQZ|z6OIN13z`X&p#`Z$%KCch75qESza2Y({jNEeaEyK1243SwO>;!Of$jYY+G zpx2j=vT1?*@%1KIe|T9%)3vI_s_wdvGrT?y2g&zAoM^0@Xz|4=2`te1#h}N*>+_Qq zF%0w;&n}oh0?Duoq5>N5G1I}yA{c?>#pH}p7|ib=T9UCCDvLtdOVW$Pa5`4kMm7*n zco1YGJ2?h2V^mT%AA&Gzz(hD0$28~mJK7I~&qWCFN$&k|6oaQO^KLEjET=@q>-#}5 zrd2fbqVY{bF-F-gmr!;PSzS`CZ0ee+8BYO=fyp~JqP5VcpShaIyj!X@-CDEhRNL*Q z+iq8!reiqmc1y3@7F?^@Pl5iWSQ0oQ#+tBBuIwA&6L#z4B_y7)%Q)ZPGtGyP_g$!& zh&e`Oi*Jw?h3pu%RdY-Ogj@$9*FfIQcCAvb8|7BJp;x{(9ET3%1CFe31&+%ss78(l zA{L7_b11;d1eCnh2>87d&U{+vBI$2uN`W!Go{&uOGyVx&>sBZEv^<+|ayP&KX%!n8 z`L+XGu=HCdf2|jBuNM8pN7wqlP{_mdO=1%4D+CrX&@MNCUeghnvjCGmm*>X5U78D< zJ-(5bBZh;aPSw7JTVKS8r66*xE7DjHqBBK`3Nr=9fvRQ*uAzN0XNoWc`+)^xm|G2j zAm#Iek>C--T#G(ac@|g&^&$cEDWOsVu`&^1D57`*A*eDwX5t1Cej%o0GvrzQpf|G% z0bu{tPA{T6L|shfb{5eFYe5G(S4qFg2ix36Y!Sg)Y$+phb2IN$RdyxTmvGK ztSc-ajjR7%=nmYR^hi|)O_EsP>jM8d_Ue~}k>XsZu6~nP&SzKuiG8AB7;%Xrc?dZD zj&QR5kZ^K}n;pYF;H9E%qnPp=7$G3r8xYk|M+%?a4bVdmkE#(EUGkBZbdmsM#bOQq zFAY=!C8P8p2Rcq1?{yqn0S72kcKhXveJ1_F*XZRQjy@N zn4&Co#wA93arU6sFvxo{i{Of}e~Ffx>7`2@niUngY3Cgz_PhN+ot_Vn>4cjuZ$u=aI^!oHW5FZ*-A^Xr_=S zbk4wad7sDBhiZkNTA*BxNyRcsB<}~xZs6SjN0U_Vim%fG^-52SK>2jQ_Tu~CzAqqb z@YC1H(242O49v62DHtPt5JU@~Mo@0XjYG7BWGFyBv;*cQwaU9WFOk3dk;dLYbutZI zQ6xZ~;$%3dmI;mj_%%lzg3#bURTuo0ei;wM=ki zMr6q`ds&*YDB4vrV(Xj?6{Xgk$=~ z$elbgd9O+8V{nAPE_yhWkQ*Z|`AwNcQ<=d$WdZGgw{~p~D3N%=_hArEQk)n4zj<;b z4xSk1;dH=wBES@0n0T0I%z$z5o6Rcik6-6!36z;}3RragjmH+hf6ODz|JAE{`SJ39 zO{;t}|F^uw|G&nu#s9bX|Go2nQEfa*{%_jw!{z_3sDE?#|A`o~NEAq2fN3Ug=?F>v znGV5e4wj=_OG+P02fO4g%;slkh}9NiV|1IVAP!5WG>d30Ix!uAw_RIp$aYgB7)Wa)fMaOc%&q#i`BZ$uObNnqw#xMU3CBGQPG z&*Ud@DLfJTSAU0?(=|*>FHE0#clFyPQkjZhDxUSl_hUK`0;R4EFIOj5&oSqoslAp; zckx*gLGTQoFUP2sCS4vYkd-7sfNQ70*`Lp|x%LbgTFS_aBMbw&^9eR!G~&SpuokpU zHrW4#lYbBSip<}d#9`H6lKeZzyIhLq1hiK6uer_sT}an5wDMEPo?VswTeobp?bwD< zxAc~6H(GAF)^0bdm4<6#qUckge|h#VxFPz}$^O-=_L|f@Q1-9gcI%aS;LZx5>~g%|G%l_|7pqcHvs@{m;ZlT%l{3t`Xm9sx#j;&5OP&5|8F%~b*EytYt_ah zEdRIH+;dm$MA4aCQhj|%WD@nL+FQAKV~>ID#sUwxv4Bz8ZY*%W zBg_A*SdWJP*`|B*`!DwP`!8SR*y4X%{O`{AU;dk*$AJJ*Wj$;P;AZMSJVo>aoc>)O zjP>r+Z{3BEW!GxyEvssk!EvqFhTVdQqz%zYrEWQJtySLy0aIqK3X|?h$mYP%51fHd z1O5Z2eL)VFe<@>DARnVjE7}B3rJI#txOVS)E@ep;J&RWWwZAIb0gcS~TLBXjL-2}DFWc&`VNlEcK~lr)z^ z(u;+b>`tI6+{ESt71yp5t1hBfE4OEu2mwDSL|R};x(QX=&%YM@>hDr9KLlt0H0cU_ zHC_Pk8}9xk;Smq6eh-C@{jWz9+MB)0ASu+bC zKH`!N6(fwh$#kK&>&mpwB5=+dKoyuV@__~*uH`!}+$fhXSFCgsQgvYP7WH!FmMM1q z%TWz}^o@I>X71~4ycBaNtK>H!!OOkWLX`cbJy>$s3p zzD@@?ofpJ9@^JB^_&#Ahsdb{&5}oVmmjcNrEL*&#UV0oyLPEF(ET!pkHJ@XkNf&ZJ z3gnP7pJQ&w%WSD-N<4npq5(lB?E)(y_JW&XBt~GtB^gmaQLDw`6yD`tx>{7lx<0Od zBybhnL!VB*F4X-9{Jfmn?v|O}J(3q93Q7QI}Zl`Qex7zD${xdvpr~=`>7&+C~ z-GelTjd!7CHE!B4EjRV{nl<>Xx7((RJE+zT@C2L2np+@zG5aQOKyni*D{CfHZkCwM zgYs(-vnD2=LkuGF^0qNDy}8X z2N2Vp$U!i%6nyMxIz&&K*Fkrp!Ks$bQfJ_6=s9V)pWT|Ty4EH{xdoc#mYp?M4cuES zPE{|NhU*mFN?8YkX}BhMWKV<`(8T(>yK8S?z*gPOYyG>?kbW4;G|^86Uq-LGj#d2p zBnGWrDO-;v8r`atq-{7(Redi^*Q<}DWUuqYdc+-)?(v56Uaugpqf!Nu2a!Nrm=|P| z7dWk7?vj6w08YEP9AKAJZ3rs#+tvb9A@)8w+|9t}?xofhsgt=1H2(Hpwmrck%m1%D z;`jf`ZqEMS_V)eX*EqKC|F-Y{*2w>_Jl6OBDi8eqzoqqW^Z&Q`|J(fkZT|mV^Z!j# zo29?SET9s%5(JN1a(Od#al2UHn_4XJE$tSvT`cg8Efy$0#2y%%TrAM2x7*e9TY*)l zS$ErwcEjB~On&=ffy(wtmluh{lP$sHMsNfe4B>Z`NNf z`JiAO=~8L`*{22D8aalL%U@?*!E5NAN`wKgRwIYt&w`7DTY%u!#a_0NHYGjP=Cq-~ zI0R}3;KddQ?$tJ=_&fH~#RUSoHP&z&=hS@wz8nh?QXr1S5Ikwbu*&ZLw|6zmZQDTj zRdBRtL<-DZQJI59BektIP(a0%=m7T=PsqyFn{o$O}e#RSh99O-0`Ou@|!^Xz#YH!z(a=D0zX z|L=KBR{W1Trr|Ef{}lemqcj!&Q}Mrt{1_^DTytHXC6|TFNAvl1^hGk&J&Rp`)l*47CSZ6=o1829@kz5_6r7b z;I;W^K>2O`Km>)A$I*cq66e|hmXW}+CI^>avM6bW`n~$TA&+{2T+`@o3o(Jo0v09+ zYnbM}o(LbEftg)^86lvQhr%F2(`S-S&$5q`B#(+5jM_<&g)c~_c$*>?ZwzMaMOo)1 zCa)*)2Q0z?hS^~h6^=(X5DOMOMPn1t4-pr3E&)NJ~3^4HdQ+ z-%t?h9x)xpHEKL_1X#zVZRTl=KJhpR_1yKR$b1v>)AKd~j{5AJ_^;$yo$L}qMi+iL zc&g)cGK(?$1zr3WI^_5%SnFdUR6&<*`Q=B+cshzd!24bCJ_^StpfIZ6p&z`}Eh%Dc z#8Xfg#VE~-{9D6$TS>jGpo#2WJ=uN)za!B31+gh5|NcE*n-jfkTHwj2W!2syHx+B; zXE*3Lbh#PF3Y=mw;$?n{vuQjlcV=w!CV*%T?=^?raFeg4$G3n(#qxauuJN;ISM`59 ziS>Uhwv7L&@_!zsDgR&j|Etvh@nqHi@itulr-{C*|D)>vsQN#u{?BojSgQWd6Fy|E z>i?+vKdSzZs{bRW{?Ce;SSlEJrDXMzJUn)Bcups?>|>sXrIN&ha4FgORE*|^VR;y? zgxs9h{t@_?rwEy;N&wxx1dy#t0R8w)#Q&)4a!XeH&oC@!IseBoRQ&HTnu`Ca_}|^} zKPhwC@AM>{=?I^JwvOdVi2w?TwZRDB+UVa3{}}`U1uq2tILGq_oLTU{&w`+91s%)Z zcnGo!w#q2PJ-y36QgRd)BLp@ar+M=cvE`oiQLK{mVic=B=oG|50f>%xdsoCuh(2Fq zU?kbJMVRG!0!5E>!YqqtDTWk}zCz?%0068ZH^{=1aC9cZhWMQph1m$L*cP!6<$#&C z<7hUg3EVW^%SoCe)Lea{KB8-sKm8g~~uyAbE${8f_rII)nF$WLTQm;DO*X zoTeuUD3n{nu}fVWa>$VOU04j9=p<=0hpBAAQUR|fCpV!hyB`q94pA%Pd`kZN{F`9d zHEt!&TkxJRiT!%X^=BA*x<1>uT|PTMgPxavor_tUj#AJ-h&}7TBV{(VEsS`Ub9z6` zFaIRNlpH5O6Bp8&=XWwXhbP)Q2j}Hg0k=jlpw|Q;yV$E?UL(b3rhf-(KFH!kJnH=v zZWvedFeKG&HHz7#p7q?RXg`viU7Z()Be)HLsq?akU=_e%cCzsDpLfmcbGc(ZE9RtS z^P+LvV26z3{0uz#*+rVYCvaCW$!EpzqbO)ROitlRjtF}HDan&l+$fo5X@n3#lT%UI z7;~CP5N6}mHv$a#NtFg}!e}P@Zm~_*hvP_HZ`Dxmy%9pcMp>O9a@1mEbvIyv0}anf zf`RIAisvS+E}n@EI>Gv}^HyNrV}rfkI+*oC96o$7>}gLdaDqIOQ^yQ;cio_C8v%Ep z48AI|?!b|#~|o&3^0&<)C5-Q|ww8Is0g!qjcQO*loC$l^yCmWk3a z<~?ARuFeddu9~O6(&Op%eP3ssySl>=-&EpI_Irl|T~3~Gc$P+$n1PioYcyg#<+yu5B?2eIq}lQPZoiLi7}GSF-L@>=W^Kw0YRcjd zPMSgQpk$3nspD)Q&F3L2n!4}_Xai}CvFtX8Tx#2nE?ym(%HY61tSn5hV5qZ!av1kf zOx;1RJ2;wHq}nQ(g2(>(~xOtR_0x~l(-?w5vZ)*qnG|%O@lQfb Date: Wed, 4 Dec 2024 13:37:14 +0100 Subject: [PATCH 43/73] code formatted --- .../dhp/sx/bio/pubmed/PMAffiliation.java | 42 ++++++----- .../dnetlib/dhp/sx/bio/pubmed/PMAuthor.java | 1 - .../dhp/sx/bio/pubmed/PMIdentifier.java | 74 +++++++++---------- .../collection/crossref/Crossref2Oaf.scala | 1 - .../dnetlib/dhp/sx/bio/pubmed/PMParser2.scala | 13 ++-- .../dhp/sx/bio/pubmed/PubMedToOaf.scala | 20 ++++- .../dnetlib/dhp/sx/bio/BioScholixTest.scala | 1 - 7 files changed, 82 insertions(+), 70 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMAffiliation.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMAffiliation.java index e3dcc95dd..427eb2725 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMAffiliation.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMAffiliation.java @@ -1,3 +1,4 @@ + package eu.dnetlib.dhp.sx.bio.pubmed; /** @@ -7,31 +8,32 @@ package eu.dnetlib.dhp.sx.bio.pubmed; */ public class PMAffiliation { - private String name; + private String name; - private PMIdentifier identifier; + private PMIdentifier identifier; - public PMAffiliation() { + public PMAffiliation() { - } - public PMAffiliation(String name, PMIdentifier identifier) { - this.name = name; - this.identifier = identifier; - } + } - public String getName() { - return name; - } + public PMAffiliation(String name, PMIdentifier identifier) { + this.name = name; + this.identifier = identifier; + } - public void setName(String name) { - this.name = name; - } + public String getName() { + return name; + } - public PMIdentifier getIdentifier() { - return identifier; - } + public void setName(String name) { + this.name = name; + } - public void setIdentifier(PMIdentifier identifier) { - this.identifier = identifier; - } + public PMIdentifier getIdentifier() { + return identifier; + } + + public void setIdentifier(PMIdentifier identifier) { + this.identifier = identifier; + } } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMAuthor.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMAuthor.java index b0df25663..e023f2e62 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMAuthor.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMAuthor.java @@ -97,5 +97,4 @@ public class PMAuthor implements Serializable { this.affiliation = affiliation; } - } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMIdentifier.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMIdentifier.java index 0c8c55e40..6cd17a90c 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMIdentifier.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMIdentifier.java @@ -1,53 +1,53 @@ + package eu.dnetlib.dhp.sx.bio.pubmed; public class PMIdentifier { - private String pid; - private String type; + private String pid; + private String type; + public PMIdentifier(String pid, String type) { + this.pid = cleanPid(pid); + this.type = type; + } - public PMIdentifier(String pid, String type) { - this.pid = cleanPid(pid); - this.type = type; - } + public PMIdentifier() { - public PMIdentifier() { + } - } + private String cleanPid(String pid) { - private String cleanPid(String pid) { + if (pid == null) { + return null; + } - if (pid == null) { - return null; - } + // clean ORCID ID in the form 0000000163025705 to 0000-0001-6302-5705 + if (pid.matches("[0-9]{15}[0-9X]")) { + return pid.replaceAll("(.{4})(.{4})(.{4})(.{4})", "$1-$2-$3-$4"); + } - // clean ORCID ID in the form 0000000163025705 to 0000-0001-6302-5705 - if (pid.matches("[0-9]{15}[0-9X]")) { - return pid.replaceAll("(.{4})(.{4})(.{4})(.{4})", "$1-$2-$3-$4"); - } + // clean ORCID in the form http://orcid.org/0000-0001-8567-3543 to 0000-0001-8567-3543 + if (pid.matches("http://orcid.org/[0-9]{4}-[0-9]{4}-[0-9]{4}-[0-9]{4}")) { + return pid.replaceAll("http://orcid.org/", ""); + } + return pid; + } - // clean ORCID in the form http://orcid.org/0000-0001-8567-3543 to 0000-0001-8567-3543 - if (pid.matches("http://orcid.org/[0-9]{4}-[0-9]{4}-[0-9]{4}-[0-9]{4}")) { - return pid.replaceAll("http://orcid.org/", ""); - } - return pid; - } + public String getPid() { + return pid; + } - public String getPid() { - return pid; - } + public PMIdentifier setPid(String pid) { + this.pid = cleanPid(pid); + return this; + } - public PMIdentifier setPid(String pid) { - this.pid = cleanPid(pid); - return this; - } + public String getType() { + return type; + } - public String getType() { - return type; - } - - public PMIdentifier setType(String type) { - this.type = type; - return this; - } + public PMIdentifier setType(String type) { + this.type = type; + return this; + } } diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/crossref/Crossref2Oaf.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/crossref/Crossref2Oaf.scala index e4a238c8f..de68ebb58 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/crossref/Crossref2Oaf.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/crossref/Crossref2Oaf.scala @@ -673,7 +673,6 @@ case object Crossref2Oaf { val doi = input.getString(0) val rorId = input.getString(1) - val pubId = IdentifierFactory.idFromPid("50", "doi", DoiCleaningRule.clean(doi), true) val affId = GenerateRorActionSetJob.calculateOpenaireId(rorId) diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PMParser2.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PMParser2.scala index 2eb4bea65..bc9a2cf02 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PMParser2.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PMParser2.scala @@ -82,21 +82,22 @@ class PMParser2 { a.setLastName((author \ "LastName").text) a.setForeName((author \ "ForeName").text) val id = (author \ "Identifier").text - val idType =(author \ "Identifier" \ "@Source").text + val idType = (author \ "Identifier" \ "@Source").text - if(id != null && id.nonEmpty && idType != null && idType.nonEmpty) { + if (id != null && id.nonEmpty && idType != null && idType.nonEmpty) { a.setIdentifier(new PMIdentifier(id, idType)) } - val affiliation = (author \ "AffiliationInfo" \ "Affiliation").text - val affiliationId = (author \ "AffiliationInfo" \ "Identifier").text + val affiliationId = (author \ "AffiliationInfo" \ "Identifier").text val affiliationIdType = (author \ "AffiliationInfo" \ "Identifier" \ "@Source").text - if(affiliation != null && affiliation.nonEmpty) { + if (affiliation != null && affiliation.nonEmpty) { val aff = new PMAffiliation() aff.setName(affiliation) - if(affiliationId != null && affiliationId.nonEmpty && affiliationIdType != null && affiliationIdType.nonEmpty) { + if ( + affiliationId != null && affiliationId.nonEmpty && affiliationIdType != null && affiliationIdType.nonEmpty + ) { aff.setIdentifier(new PMIdentifier(affiliationId, affiliationIdType)) } a.setAffiliation(aff) diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala index 5e14c731a..281ca0e07 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala @@ -294,11 +294,23 @@ object PubMedToOaf { author.setName(a.getForeName) author.setSurname(a.getLastName) author.setFullname(a.getFullName) - if(a.getIdentifier != null) { - author.setPid(List(OafMapperUtils.structuredProperty(a.getIdentifier.getPid, - OafMapperUtils.qualifier(a.getIdentifier.getType,a.getIdentifier.getType,ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES), dataInfo)).asJava) + if (a.getIdentifier != null) { + author.setPid( + List( + OafMapperUtils.structuredProperty( + a.getIdentifier.getPid, + OafMapperUtils.qualifier( + a.getIdentifier.getType, + a.getIdentifier.getType, + ModelConstants.DNET_PID_TYPES, + ModelConstants.DNET_PID_TYPES + ), + dataInfo + ) + ).asJava + ) } - if (a.getAffiliation!= null) + if (a.getAffiliation != null) author.setRawAffiliationString(List(a.getAffiliation.getName).asJava) author.setRank(index + 1) author diff --git a/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala b/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala index 4a926df01..cb7826dbf 100644 --- a/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala +++ b/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala @@ -63,7 +63,6 @@ class BioScholixTest extends AbstractVocabularyTest { "0000000333457333", "0000000335964515", "0000000302921949", - "http://orcid.org/0000-0001-8567-3543", "http://orcid.org/0000-0001-7868-8528", "0000-0001-9189-1440", From 730a7751b68b0dc2c5a8fa1f97ee4e82bdb2b3f2 Mon Sep 17 00:00:00 2001 From: "sandro.labruzzo" Date: Wed, 4 Dec 2024 15:03:59 +0100 Subject: [PATCH 44/73] added zenodoDump to enum of CollectorPlugin --- .../main/java/eu/dnetlib/dhp/collection/CollectorWorker.java | 3 +++ .../java/eu/dnetlib/dhp/collection/plugin/CollectorPlugin.java | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorker.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorker.java index f63bfcb48..4c6d0653e 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorker.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorker.java @@ -7,6 +7,7 @@ import java.io.IOException; import java.util.Optional; import java.util.concurrent.atomic.AtomicInteger; +import eu.dnetlib.dhp.collection.plugin.zenodo.CollectZenodoDumpCollectorPlugin; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; @@ -129,6 +130,8 @@ public class CollectorWorker extends ReportingJob { return new Gtr2PublicationsCollectorPlugin(this.clientParams); case osfPreprints: return new OsfPreprintsCollectorPlugin(this.clientParams); + case zenodoDump: + return new CollectZenodoDumpCollectorPlugin(); case other: final CollectorPlugin.NAME.OTHER_NAME plugin = Optional .ofNullable(this.api.getParams().get("other_plugin_type")) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/CollectorPlugin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/CollectorPlugin.java index 72e691579..93e65b6a7 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/CollectorPlugin.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/CollectorPlugin.java @@ -11,7 +11,7 @@ public interface CollectorPlugin { enum NAME { - oai, other, rest_json2xml, file, fileGzip, baseDump, gtr2Publications, osfPreprints; + oai, other, rest_json2xml, file, fileGzip, baseDump, gtr2Publications, osfPreprints, zenodoDump; public enum OTHER_NAME { mdstore_mongodb_dump, mdstore_mongodb From bde59a7c8f49cd964317a9e240dacb3be1eeec01 Mon Sep 17 00:00:00 2001 From: miconis Date: Thu, 5 Dec 2024 11:09:30 +0100 Subject: [PATCH 45/73] implementation of the utilities for the inclusion of raids in the graph --- .../java/eu/dnetlib/dhp/common/Constants.java | 4 + .../dnetlib/dhp/actionmanager/Constants.java | 8 + .../raid/GenerateRAiDActionSetJob.java | 190 ++++++++++++++++++ .../raid/model/GenerateRAiDActionSetJob.java | 2 + .../actionmanager/raid/model/RAiDEntity.java | 102 ++++++++++ .../ror/GenerateRorActionSetJob.java | 8 +- .../raid/action_set_parameters.json | 14 ++ .../raid/oozie_app/action_set_parameters.json | 0 .../raid/oozie_app/config-default.xml | 58 ++++++ .../actionmanager/raid/oozie_app/workflow.xml | 55 +++++ .../raid/GenerateRAiDActionSetJobTest.java | 112 +++++++++++ .../dhp/actionmanager/raid/raid_example.json | 6 + 12 files changed, 552 insertions(+), 7 deletions(-) create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/raid/GenerateRAiDActionSetJob.java create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/raid/model/GenerateRAiDActionSetJob.java create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/raid/model/RAiDEntity.java create mode 100644 dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/raid/action_set_parameters.json create mode 100644 dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/raid/oozie_app/action_set_parameters.json create mode 100644 dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/raid/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/raid/oozie_app/workflow.xml create mode 100644 dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/raid/GenerateRAiDActionSetJobTest.java create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/raid/raid_example.json diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java index 0477d6399..b00199ea5 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java @@ -10,6 +10,10 @@ public class Constants { public static final Map accessRightsCoarMap = Maps.newHashMap(); public static final Map coarCodeLabelMap = Maps.newHashMap(); + public static final String RAID_NS_PREFIX = "raid________"; + public static final String RAID_DATASOURCE_NAME = "Research Activity Identifier Service (RAiD)"; + public static final String RAID_OPENAIRE_ID = ""; + public static final String ROR_NS_PREFIX = "ror_________"; public static final String ROR_OPENAIRE_ID = "10|openaire____::993a7ae7a863813cf95028b50708e222"; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java index 73b4b77cb..722415c2e 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java @@ -3,6 +3,8 @@ package eu.dnetlib.dhp.actionmanager; import java.util.Optional; +import eu.dnetlib.dhp.schema.oaf.Instance; +import eu.dnetlib.dhp.schema.oaf.Qualifier; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; @@ -110,6 +112,12 @@ public class Constants { } + public static Instance getInstance(Qualifier qualifier) { + Instance instance = new Instance(); + instance.setInstancetype(qualifier); + return instance; + } + public static void removeOutputDir(SparkSession spark, String path) { HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration()); } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/raid/GenerateRAiDActionSetJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/raid/GenerateRAiDActionSetJob.java new file mode 100644 index 000000000..8e5e1bdcb --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/raid/GenerateRAiDActionSetJob.java @@ -0,0 +1,190 @@ +package eu.dnetlib.dhp.actionmanager.raid; + +import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.actionmanager.raid.model.RAiDEntity; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.Constants; +import eu.dnetlib.dhp.common.HdfsSupport; +import eu.dnetlib.dhp.schema.action.AtomicAction; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; +import eu.dnetlib.dhp.utils.DHPUtils; +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapred.SequenceFileOutputFormat; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SparkSession; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import scala.Tuple2; + +import java.util.*; +import java.util.stream.Collectors; + +import static eu.dnetlib.dhp.actionmanager.personentity.ExtractPerson.OPENAIRE_DATASOURCE_ID; +import static eu.dnetlib.dhp.actionmanager.personentity.ExtractPerson.OPENAIRE_DATASOURCE_NAME; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import static eu.dnetlib.dhp.schema.common.ModelConstants.*; +import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*; + +public class GenerateRAiDActionSetJob { + + private static final Logger log = LoggerFactory.getLogger(eu.dnetlib.dhp.actionmanager.raid.GenerateRAiDActionSetJob.class); + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + private static final List RAID_COLLECTED_FROM = listKeyValues( + OPENAIRE_DATASOURCE_ID, OPENAIRE_DATASOURCE_NAME); + + private static final Qualifier RAID_QUALIFIER = qualifier("raid:openaireinference", "raid:openaireinference", DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS); + + private static final DataInfo RAID_DATA_INFO = dataInfo( + false, OPENAIRE_DATASOURCE_NAME, true, false, RAID_QUALIFIER, "0.92"); + + public static void main(final String[] args) throws Exception { + + final String jsonConfiguration = IOUtils + .toString( + eu.dnetlib.dhp.actionmanager.raid.GenerateRAiDActionSetJob.class + .getResourceAsStream("/eu/dnetlib/dhp/actionmanager/raid/action_set_parameters.json")); + + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); + + parser.parseArgument(args); + + final Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + final String inputPath = parser.get("inputPath"); + log.info("inputPath: {}", inputPath); + + final String outputPath = parser.get("outputPath"); + log.info("outputPath {}: ", outputPath); + + final SparkConf conf = new SparkConf(); + + runWithSparkSession(conf, isSparkSessionManaged, spark -> { + removeOutputDir(spark, outputPath); + processRAiDEntities(spark, inputPath, outputPath); + }); + } + + private static void removeOutputDir(final SparkSession spark, final String path) { + HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration()); + } + + static void processRAiDEntities(final SparkSession spark, + final String inputPath, + final String outputPath) { + readInputPath(spark, inputPath) + .map(GenerateRAiDActionSetJob::prepareRAiD) + .flatMap(List::iterator) + .mapToPair( + aa -> new Tuple2<>(new Text(aa.getClazz().getCanonicalName()), + new Text(OBJECT_MAPPER.writeValueAsString(aa)))) + .saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class); + + } + + protected static List> prepareRAiD(final RAiDEntity r) { + + final Date now = new Date(); + final OtherResearchProduct orp = new OtherResearchProduct(); + final List> res = new ArrayList<>(); + String raidId = calculateOpenaireId(r.getRaid()); + + orp.setId(raidId); + orp.setCollectedfrom(RAID_COLLECTED_FROM); + orp.setDataInfo(RAID_DATA_INFO); + orp.setResourcetype(RAID_QUALIFIER); + orp.setTitle( + Collections.singletonList( + structuredProperty( + r.getTitle(), + qualifier("main title", "main title", DNET_DATACITE_TITLE, DNET_DATACITE_TITLE), + RAID_DATA_INFO)) + ); + orp.setDescription(listFields(RAID_DATA_INFO, r.getSummary())); + orp.setAuthor(createAuthors(r.getAuthors())); + orp.setInstance(Collections.singletonList(eu.dnetlib.dhp.actionmanager.Constants.getInstance(RAID_QUALIFIER))); + orp.setSubject( + r.getSubjects() + .stream() + .map(s -> subject(s, qualifier(DNET_SUBJECT_KEYWORD, DNET_SUBJECT_KEYWORD, DNET_SUBJECT_TYPOLOGIES, DNET_SUBJECT_TYPOLOGIES), RAID_DATA_INFO)) + .collect(Collectors.toList()) + ); + orp.setRelevantdate( + Arrays.asList( + structuredProperty(r.getEndDate(), qualifier("endDate","endDate", DNET_DATACITE_DATE, DNET_DATACITE_DATE), RAID_DATA_INFO), + structuredProperty(r.getStartDate(), qualifier("startDate", "startDate", DNET_DATACITE_DATE, DNET_DATACITE_DATE), RAID_DATA_INFO) + ) + ); + orp.setLastupdatetimestamp(now.getTime()); + orp.setDateofcollection(r.getStartDate()); + + res.add(new AtomicAction<>(OtherResearchProduct.class, orp)); + + for(String resultId: r.getIds()) { + Relation rel1 = OafMapperUtils.getRelation( + raidId, + resultId, + ModelConstants.RESULT_RESULT, + ModelConstants.OUTCOME, + PART, + RAID_COLLECTED_FROM, + RAID_DATA_INFO, + now.getTime(), + null, + null + ); + Relation rel2 = OafMapperUtils.getRelation( + resultId, + raidId, + ModelConstants.RESULT_RESULT, + ModelConstants.OUTCOME, + IS_PART_OF, + RAID_COLLECTED_FROM, + RAID_DATA_INFO, + now.getTime(), + null, + null + ); + res.add(new AtomicAction<>(Relation.class, rel1)); + res.add(new AtomicAction<>(Relation.class, rel2)); + } + + return res; + } + + public static String calculateOpenaireId(final String raid) { + return String.format("50|%s::%s", Constants.RAID_NS_PREFIX, DHPUtils.md5(raid)); + } + + public static List createAuthors(final List author) { + return author.stream().map(s-> { + Author a = new Author(); + a.setFullname(s); + return a; + }).collect(Collectors.toList()); + } + + private static JavaRDD readInputPath( + final SparkSession spark, + final String path) { + + return spark + .read() + .json(path) + .as(Encoders.bean(RAiDEntity.class)) + .toJavaRDD(); + + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/raid/model/GenerateRAiDActionSetJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/raid/model/GenerateRAiDActionSetJob.java new file mode 100644 index 000000000..b0aec71d3 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/raid/model/GenerateRAiDActionSetJob.java @@ -0,0 +1,2 @@ +package eu.dnetlib.dhp.actionmanager.raid.model;public class GenerateRAiDActionSetJob { +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/raid/model/RAiDEntity.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/raid/model/RAiDEntity.java new file mode 100644 index 000000000..bd7e28926 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/raid/model/RAiDEntity.java @@ -0,0 +1,102 @@ +package eu.dnetlib.dhp.actionmanager.raid.model; + +import java.io.Serializable; +import java.util.List; + +public class RAiDEntity implements Serializable { + + String raid; + List authors; + String startDate; + String endDate; + List subjects; + List titles; + List ids; + String title; + String summary; + + public RAiDEntity(){} + public RAiDEntity(String raid, List authors, String startDate, String endDate, List subjects, List titles, List ids, String title, String summary) { + this.raid = raid; + this.authors = authors; + this.startDate = startDate; + this.endDate = endDate; + this.subjects = subjects; + this.titles = titles; + this.ids = ids; + this.title = title; + this.summary = summary; + } + + public String getRaid() { + return raid; + } + + public void setRaid(String raid) { + this.raid = raid; + } + + public List getAuthors() { + return authors; + } + + public void setAuthors(List authors) { + this.authors = authors; + } + + public String getStartDate() { + return startDate; + } + + public void setStartDate(String startDate) { + this.startDate = startDate; + } + + public String getEndDate() { + return endDate; + } + + public void setEndDate(String endDate) { + this.endDate = endDate; + } + + public List getSubjects() { + return subjects; + } + + public void setSubjects(List subjects) { + this.subjects = subjects; + } + + public List getTitles() { + return titles; + } + + public void setTitles(List titles) { + this.titles = titles; + } + + public List getIds() { + return ids; + } + + public void setIds(List ids) { + this.ids = ids; + } + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = title; + } + + public String getSummary() { + return summary; + } + + public void setSummary(String summary) { + this.summary = summary; + } +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJob.java index 5f3493d56..ce1973a7f 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJob.java @@ -21,6 +21,7 @@ import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; +import eu.dnetlib.dhp.schema.oaf.*; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; @@ -44,13 +45,6 @@ import eu.dnetlib.dhp.common.Constants; import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.schema.action.AtomicAction; import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.DataInfo; -import eu.dnetlib.dhp.schema.oaf.Field; -import eu.dnetlib.dhp.schema.oaf.KeyValue; -import eu.dnetlib.dhp.schema.oaf.Oaf; -import eu.dnetlib.dhp.schema.oaf.Organization; -import eu.dnetlib.dhp.schema.oaf.Qualifier; -import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import eu.dnetlib.dhp.utils.DHPUtils; import scala.Tuple2; diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/raid/action_set_parameters.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/raid/action_set_parameters.json new file mode 100644 index 000000000..2049630d2 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/raid/action_set_parameters.json @@ -0,0 +1,14 @@ +[ + { + "paramName": "i", + "paramLongName": "inputPath", + "paramDescription": "the path of the input json", + "paramRequired": true + }, + { + "paramName": "o", + "paramLongName": "outputPath", + "paramDescription": "the path of the new ActionSet", + "paramRequired": true + } +] \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/raid/oozie_app/action_set_parameters.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/raid/oozie_app/action_set_parameters.json new file mode 100644 index 000000000..e69de29bb diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/raid/oozie_app/config-default.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/raid/oozie_app/config-default.xml new file mode 100644 index 000000000..a1755f329 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/raid/oozie_app/config-default.xml @@ -0,0 +1,58 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + + hive_metastore_uris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + spark2YarnHistoryServerAddress + http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + + + oozie.launcher.mapreduce.user.classpath.first + true + + + sparkExecutorNumber + 4 + + + spark2EventLogDir + /user/spark/spark2ApplicationHistory + + + sparkDriverMemory + 15G + + + sparkExecutorMemory + 6G + + + sparkExecutorCores + 1 + + \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/raid/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/raid/oozie_app/workflow.xml new file mode 100644 index 000000000..9b5aa5905 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/raid/oozie_app/workflow.xml @@ -0,0 +1,55 @@ + + + + raidJsonInputPath + the path of the json + + + raidActionSetPath + path where to store the action set + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + + + + + + + + + + + yarn + cluster + ProcessRAiDFile + eu.dnetlib.dhp.actionmanager.raid.GenerateRAiDActionSetJob + dhp-aggregation-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=3840 + + --inputPath${raidJsonInputPath} + --outputPath${raidActionSetPath} + + + + + + + diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/raid/GenerateRAiDActionSetJobTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/raid/GenerateRAiDActionSetJobTest.java new file mode 100644 index 000000000..1f33f45b2 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/raid/GenerateRAiDActionSetJobTest.java @@ -0,0 +1,112 @@ +package eu.dnetlib.dhp.actionmanager.raid; + +import eu.dnetlib.dhp.actionmanager.opencitations.CreateOpenCitationsASTest; +import eu.dnetlib.dhp.actionmanager.raid.model.RAiDEntity; +import eu.dnetlib.dhp.schema.action.AtomicAction; +import eu.dnetlib.dhp.schema.oaf.Oaf; +import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct; +import eu.dnetlib.dhp.schema.oaf.Relation; +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaPairRDD; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.rdd.RDD; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.SparkSession; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; +import scala.Tuple2; + +import java.io.File; +import java.nio.file.Paths; +import java.util.Arrays; +import java.util.List; + +import static eu.dnetlib.dhp.actionmanager.Constants.OBJECT_MAPPER; +import static java.nio.file.Files.createTempDirectory; +import static org.junit.jupiter.api.Assertions.assertEquals; + +public class GenerateRAiDActionSetJobTest { + private static String input_path; + private static String output_path; + static SparkSession spark; + + @BeforeEach + void setUp() throws Exception { + + input_path = Paths + .get(GenerateRAiDActionSetJobTest.class.getResource("/eu/dnetlib/dhp/actionmanager/raid/raid_example.json").toURI()) + .toFile() + .getAbsolutePath(); + + output_path = createTempDirectory(GenerateRAiDActionSetJobTest.class.getSimpleName() + "-") + .toAbsolutePath() + .toString(); + + SparkConf conf = new SparkConf(); + conf.setAppName(GenerateRAiDActionSetJobTest.class.getSimpleName()); + + conf.setMaster("local[*]"); + conf.set("spark.driver.host", "localhost"); + conf.set("hive.metastore.local", "true"); + conf.set("spark.ui.enabled", "false"); + conf.set("spark.sql.warehouse.dir", output_path); + conf.set("hive.metastore.warehouse.dir", output_path); + + spark = SparkSession + .builder() + .appName(GenerateRAiDActionSetJobTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + } + + @AfterAll + static void cleanUp() throws Exception { + FileUtils.deleteDirectory(new File(output_path)); + } + + @Test + @Disabled + void testProcessRAiDEntities() { + GenerateRAiDActionSetJob.processRAiDEntities(spark, input_path, output_path + "/test_raid_action_set"); + + JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD result = sc + .sequenceFile(output_path + "/test_raid_action_set", Text.class, Text.class) + .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) + .map(AtomicAction::getPayload); + + assertEquals(80, result.count()); + } + + @Test + void testPrepareRAiD() { + + List> atomicActions = GenerateRAiDActionSetJob.prepareRAiD(new RAiDEntity( + "-92190526", + Arrays.asList("Berli, Justin", "Le Mao, Bérénice", "Guillaume Touya", "Wenclik, Laura", "Courtial, Azelle", "Muehlenhaus, Ian", "Justin Berli", "Touya, Guillaume", "Gruget, Maïeul", "Azelle Courtial", "Ian Muhlenhaus", "Maïeul Gruget", "Marion Dumont", "Maïeul GRUGET", "Cécile Duchêne"), + "2021-09-10", + "2024-02-16", + Arrays.asList("cartography, zoom, pan, desert fog", "Road network", "zooming", "Pan-scalar maps", "pan-scalar map", "Python library", "QGIS", "map design", "landmarks", "Cartes transscalaires", "anchor", "disorientation", "[INFO]Computer Science [cs]", "[SHS.GEO]Humanities and Social Sciences/Geography", "cognitive cartography", "eye-tracking", "Computers in Earth Sciences", "Topographic map", "National Mapping Agency", "General Medicine", "Geography, Planning and Development", "multi-scales", "pan-scalar maps", "Selection", "cartography", "General Earth and Planetary Sciences", "progressiveness", "map generalisation", "Eye-tracker", "zoom", "algorithms", "Map Design", "cartography, map generalisation, zoom, multi-scale map", "Interactive maps", "Map generalisation", "Earth and Planetary Sciences (miscellaneous)", "Cartographic generalization", "rivers", "Benchmark", "General Environmental Science", "open source", "drawing", "Constraint", "Multi-scale maps"), + Arrays.asList("Where do people look at during multi-scale map tasks?", "FogDetector survey raw data", "Collection of cartographic disorientation stories", "Anchorwhat dataset", "BasqueRoads: A Benchmark for Road Network Selection", "Progressive river network selection for pan-scalar maps", "BasqueRoads, a dataset to benchmark road selection algorithms", "Missing the city for buildings? A critical review of pan-scalar map generalization and design in contemporary zoomable maps", "Empirical approach to advance the generalisation of multi-scale maps", "L'Alpe d'Huez: a dataset to benchmark topographic map generalisation", "eye-tracking data from a survey on zooming in a pan-scalar map", "Material of the experiment 'More is Less' from the MapMuxing project", "Cartagen4py, an open source Python library for map generalisation", "L’Alpe d’Huez: A Benchmark for Topographic Map Generalisation"), + Arrays.asList("50|doi_dedup___::6915135e0aa39f913394513f809ae58a", "50|doi_dedup___::754e3c283639bc6e104c925ff3e34007", "50|doi_dedup___::13517477f3c1261d57a3364363ce6ce0", "50|doi_dedup___::675b16c73accc4e7242bbb4ed9b3724a", "50|doi_dedup___::94ce09906b2d7d37eb2206cea8a50153", "50|dedup_wf_002::cc575d5ca5651ff8c3029a3a76e7e70a", "50|doi_dedup___::c5e52baddda17c755d1bae012a97dc13", "50|doi_dedup___::4f5f38c9e08fe995f7278963183f8ad4", "50|doi_dedup___::a9bc4453273b2d02648a5cb453195042", "50|doi_dedup___::5e893dc0cb7624a33f41c9b428bd59f7", "50|doi_dedup___::c1ecdef48fd9be811a291deed950e1c5", "50|doi_dedup___::9e93c8f2d97c35de8a6a57a5b53ef283", "50|dedup_wf_002::d08be0ed27b13d8a880e891e08d093ea", "50|doi_dedup___::f8d8b3b9eddeca2fc0e3bc9e63996555"), + "Exploring Multi-Scale Map Generalization and Design", + "This project aims to advance the generalization of multi-scale maps by investigating the impact of different design elements on user experience. The research involves collecting and analyzing data from various sources, including surveys, eye-tracking studies, and user experiments. The goal is to identify best practices for map generalization and design, with a focus on reducing disorientation and improving information retrieval during exploration. The project has led to the development of several datasets, including BasqueRoads, AnchorWhat, and L'Alpe d'Huez, which can be used to benchmark road selection algorithms and topographic map generalization techniques. The research has also resulted in the creation of a Python library, Cartagen4py, for map generalization. The findings of this project have the potential to improve the design and usability of multi-scale maps, making them more effective tools for navigation and information retrieval." + )); + + OtherResearchProduct orp = (OtherResearchProduct) atomicActions.get(0).getPayload(); + Relation rel = (Relation) atomicActions.get(1).getPayload(); + + assertEquals("Exploring Multi-Scale Map Generalization and Design", orp.getTitle().get(0).getValue()); + assertEquals("50|raid________::759a564ce5cc7360cab030c517c7366b", rel.getSource()); + assertEquals("50|doi_dedup___::6915135e0aa39f913394513f809ae58a", rel.getTarget()); + + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/raid/raid_example.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/raid/raid_example.json new file mode 100644 index 000000000..7694b605c --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/raid/raid_example.json @@ -0,0 +1,6 @@ +{"raid": "-9222092103004099540", "authors": ["Department of Archaeology & Museums", "Department of Archaeology and Museums", "Department Of Archaeology & Museums"], "subjects": ["Begamganj", "Raisen", "Bhopal", "Budhni", "Malwa site survey", "सीहोर", "Gauharganj", "बुधनी", "Budni", "Berasia"], "titles": ["Malwa site survey : Raisen District, Begamganj Tahsīl, photographic documentation", "Malwa site survey : Bhopal District, photographic documentation (version 1, TIFF files)", "Malwa site survey : Raisen District, Gauharganj Tahsīl, village finds", "Malwa site survey : Sehore सीहोर District, Budni Tahsīl, photographic documentation (part 1)", "Malwa site survey: Bhopal District, Berasia Tahsīl, photographic documentation (with villages named)", "Malwa site survey : Sehore सीहोर District, Budni Tahsīl, photographic documentation (part 2)", "Malwa site survey : Bhopal District, photographic documentation (version 2, JPEG files)"], "ids": ["50|doi_dedup___::7523d165970830dd857e6cbea4302adf", "50|doi_dedup___::02309ae8a9fae291df321e317f5c5330", "50|doi_dedup___::95347ba2c4264414fab39712ee7fe481", "50|doi_dedup___::970aa708fe667596754fd02a708780f5", "50|doi_dedup___::b7cd9128cc53b1257a4f000347f339b0", "50|doi_dedup___::c7d65da0ecedef4d2c702b9db197d90c", "50|doi_dedup___::addbb67cf5046e340f342ba091bcebfa"], "title": "Documentation of Malwa Region", "summary": "This project involves the documentation of the Malwa region through photographic surveys. The surveys were conducted by the Department of Archaeology and Museums, Madhya Pradesh, and cover various districts and tahsils. The documentation includes photographic records of sites, villages, and other relevant features. The project aims to provide a comprehensive understanding of the region's cultural and historical significance.", "startDate": "2019-03-06", "endDate": "2019-03-08"} +{"raid": "-9221424331076109424", "authors": ["Hutchings, Judy", "Ward, Catherine", "Baban, Adriana", "D��nil��, Ingrid", "Frantz, Inga", "Gardner, Frances", "Lachman, Jamie", "Lachman, Jamie M.", "Foran, Heather", "Heinrichs, Nina", "Murphy, Hugh", "B��ban, Adriana", "Raleva, Marija", "Fang, Xiangming", "Jansen, Elena", "Taut, Diana", "Foran, Heather M.", "T��ut, Diana", "Ward, Catherine L.", "Williams, Margiad", "Lesco, Galina", "Brühl, Antonia"], "subjects": ["3. Good health", "5. Gender equality", "Criminology not elsewhere classified", "1. No poverty", "2. Zero hunger"], "titles": ["sj-docx-1-vaw-10.1177_10778012231188090 - Supplemental material for Co-Occurrence of Intimate Partner Violence Against Mothers and Maltreatment of Their Children With Behavioral Problems in Eastern Europe", "Hunger in vulnerable families in Southeastern Europe: Associations with health and violence", "Prevention of child mental health problems through parenting interventions in Southeastern Europe (RISE): study protocol for a multi-site randomised controlled trial"], "ids": ["50|doi_dedup___::a70015063e5400dae2e097ee10b4a589", "50|doi_dedup___::6e1d12026fcde9087724622ccdeed430", "50|doi_dedup___::5b7bd5d46c5d95e2ef5b36663504a67e"], "title": "Exploring the Impact of Hunger and Violence on Child Health in Southeastern Europe", "summary": "This study aims to investigate the relationship between hunger, violence, and child health in vulnerable families in Southeastern Europe. The research will explore the experiences of families in FYR Macedonia, Republic of Moldova, and Romania, and examine the associations between hunger, maltreatment, and other health indicators. The study will also test the efficacy of a parenting intervention targeting child behavioral problems in alleviating these issues. The findings of this research will contribute to the development of effective interventions to address the complex needs of vulnerable families in the region.", "startDate": "2019-06-04", "endDate": "2023-01-01"} +{"raid": "-9219052635741785098", "authors": ["Berli, Justin", "Le Mao, Bérénice", "Guillaume Touya", "Wenclik, Laura", "Courtial, Azelle", "Muehlenhaus, Ian", "Justin Berli", "Touya, Guillaume", "Gruget, Maïeul", "Azelle Courtial", "Ian Muhlenhaus", "Maïeul Gruget", "Marion Dumont", "Maïeul GRUGET", "Cécile Duchêne"], "subjects": ["cartography, zoom, pan, desert fog", "Road network", "zooming", "Pan-scalar maps", "pan-scalar map", "Python library", "QGIS", "map design", "landmarks", "Cartes transscalaires", "anchor", "disorientation", "[INFO]Computer Science [cs]", "[SHS.GEO]Humanities and Social Sciences/Geography", "cognitive cartography", "eye-tracking", "Computers in Earth Sciences", "Topographic map", "National Mapping Agency", "General Medicine", "Geography, Planning and Development", "multi-scales", "pan-scalar maps", "Selection", "cartography", "General Earth and Planetary Sciences", "progressiveness", "map generalisation", "Eye-tracker", "zoom", "algorithms", "Map Design", "cartography, map generalisation, zoom, multi-scale map", "Interactive maps", "Map generalisation", "Earth and Planetary Sciences (miscellaneous)", "Cartographic generalization", "rivers", "Benchmark", "General Environmental Science", "open source", "drawing", "Constraint", "Multi-scale maps"], "titles": ["Where do people look at during multi-scale map tasks?", "FogDetector survey raw data", "Collection of cartographic disorientation stories", "Anchorwhat dataset", "BasqueRoads: A Benchmark for Road Network Selection", "Progressive river network selection for pan-scalar maps", "BasqueRoads, a dataset to benchmark road selection algorithms", "Missing the city for buildings? A critical review of pan-scalar map generalization and design in contemporary zoomable maps", "Empirical approach to advance the generalisation of multi-scale maps", "L'Alpe d'Huez: a dataset to benchmark topographic map generalisation", "eye-tracking data from a survey on zooming in a pan-scalar map", "Material of the experiment \"More is Less\" from the MapMuxing project", "Cartagen4py, an open source Python library for map generalisation", "L’Alpe d’Huez: A Benchmark for Topographic Map Generalisation"], "ids": ["50|doi_dedup___::6915135e0aa39f913394513f809ae58a", "50|doi_dedup___::754e3c283639bc6e104c925ff3e34007", "50|doi_dedup___::13517477f3c1261d57a3364363ce6ce0", "50|doi_dedup___::675b16c73accc4e7242bbb4ed9b3724a", "50|doi_dedup___::94ce09906b2d7d37eb2206cea8a50153", "50|dedup_wf_002::cc575d5ca5651ff8c3029a3a76e7e70a", "50|doi_dedup___::c5e52baddda17c755d1bae012a97dc13", "50|doi_dedup___::4f5f38c9e08fe995f7278963183f8ad4", "50|doi_dedup___::a9bc4453273b2d02648a5cb453195042", "50|doi_dedup___::5e893dc0cb7624a33f41c9b428bd59f7", "50|doi_dedup___::c1ecdef48fd9be811a291deed950e1c5", "50|doi_dedup___::9e93c8f2d97c35de8a6a57a5b53ef283", "50|dedup_wf_002::d08be0ed27b13d8a880e891e08d093ea", "50|doi_dedup___::f8d8b3b9eddeca2fc0e3bc9e63996555"], "title": "Exploring Multi-Scale Map Generalization and Design", "summary": "This project aims to advance the generalization of multi-scale maps by investigating the impact of different design elements on user experience. The research involves collecting and analyzing data from various sources, including surveys, eye-tracking studies, and user experiments. The goal is to identify best practices for map generalization and design, with a focus on reducing disorientation and improving information retrieval during exploration. The project has led to the development of several datasets, including BasqueRoads, AnchorWhat, and L'Alpe d'Huez, which can be used to benchmark road selection algorithms and topographic map generalization techniques. The research has also resulted in the creation of a Python library, Cartagen4py, for map generalization. The findings of this project have the potential to improve the design and usability of multi-scale maps, making them more effective tools for navigation and information retrieval.", "startDate": "2021-09-10", "endDate": "2024-02-16"} +{"raid": "-9216828847055450272", "authors": ["Grey, Alan", "Gorelov, Sergey", "Pall, Szilard", "Merz, Pascal", "Justin A., Lemkul", "Szilárd Páll", "Pasquadibisceglie, Andrea", "Kutzner, Carsten", "Schulz, Roland", "Nabet, Julien", "Abraham, Mark", "Jalalypour, Farzaneh", "Lundborg, Magnus", "Gray, Alan", "Villa, Alessandra", "Berk Hess", "Santuz, Hubert", "Irrgang, M. Eric", "Wingbermuehle, Sebastian", "Lemkul, Justin A.", "Jordan, Joe", "Pellegrino, Michele", "Doijade, Mahesh", "Shvetsov, Alexey", "Hess, Berk", "Behera, Sudarshan", "Andrey Alekseenko", "Shugaeva, Tatiana", "Fleischmann, Stefan", "Bergh, Cathrine", "Morozov, Dmitry", "Adam Hospital", "Briand, Eliane", "Lindahl, Erik", "Brown, Ania", "Marta Lloret Llinares", "Miletic, Vedran", "Alekseenko, Andrey", "Gouaillardet, Gilles", "Fiorin, Giacomo", "Basov, Vladimir"], "subjects": ["webinar"], "titles": ["Introduction to HPC: molecular dynamics simulations with GROMACS: log files", "BioExcel webinar #73: Competency frameworks to support training design and professional development", "Introduction to HPC: molecular dynamics simulations with GROMACS: output files - Devana", "GROMACS 2024.0 Manual", "BioExcel Webinar #71: GROMACS-PMX for accurate estimation of free energy differences", "Introduction to HPC: molecular dynamics simulations with GROMACS: input files", "BioExcel Webinar #68: What's new in GROMACS 2023", "BioExcel Webinar #69: BioBB-Wfs and BioBB-API, integrated web-based platform and programmatic interface for biomolecular simulations workflows using the BioExcel Building Blocks library", "GROMACS 2024-beta Source code"], "ids": ["50|doi_dedup___::8318fbc815ee1943c3269be7567f220b", "50|doi_dedup___::9530e03fb2aac63e82b18a40dc09e32c", "50|doi_dedup___::30174ab31075e76a428ca5b4f4d236b8", "50|doi_________::70b7c6dce09ae6f1361d22913fdf95eb", "50|doi_dedup___::337dd48600618f3c06257edd750d6201", "50|doi_dedup___::d622992ba9077617f37ebd268b3e806d", "50|doi_dedup___::0b0bcc6825d6c052c37882fd5cfc1e8c", "50|doi_dedup___::4b1541a7cee32527c65ace5d1ed57335", "50|doi_dedup___::1379861df59bd755e4fb39b9f95ffbd3"], "title": "Exploring High-Performance Computing and Biomolecular Simulations", "summary": "This project involves exploring high-performance computing (HPC) and biomolecular simulations using GROMACS. The objectives include understanding molecular dynamics simulations, log files, input files, and output files. Additionally, the project aims to explore competency frameworks for professional development, specifically in the field of computational biomolecular research. The tools and techniques used will include GROMACS, BioExcel Building Blocks, and competency frameworks. The expected outcomes include a deeper understanding of HPC and biomolecular simulations, as well as the development of skills in using GROMACS and BioExcel Building Blocks. The project will also contribute to the development of competency frameworks for professional development in the field of computational biomolecular research.", "startDate": "2023-04-25", "endDate": "2024-01-30"} +{"raid": "-9210544816395499758", "authors": ["Bateson, Melissa", "Andrews, Clare", "Verhulst, Simon", "Nettle, Daniel", "Zuidersma, Erica"], "subjects": ["2. Zero hunger"], "titles": ["Exposure to food insecurity increases energy storage and reduces somatic maintenance in European starlings", "Data and code archive for Andrews et al. 'Exposure to food insecurity increases energy storage and reduces somatic maintenance in European starlings'"], "ids": ["50|doi_dedup___::176117239be06189523c253e0ca9c5ec", "50|doi_dedup___::343e0b0ddf0d54763a89a62af1f7a379"], "title": "Investigating the Effects of Food Insecurity on Energy Storage and Somatic Maintenance in European Starlings", "summary": "This study examines the impact of food insecurity on energy storage and somatic maintenance in European starlings. The research involved exposing juvenile starlings to either uninterrupted food availability or a regime of unpredictable food unavailability. The results show that birds exposed to food insecurity stored more energy, but at the expense of somatic maintenance and repair. The study provides insights into the adaptive responses of birds to food scarcity and the trade-offs involved in energy storage and maintenance.", "startDate": "2021-06-28", "endDate": "2021-06-28"} +{"raid": "-9208499171224730388", "authors": ["Maniati, Eleni", "Bakker, Bjorn", "McClelland, Sarah E.", "Shaikh, Nadeem", "De Angelis, Simone", "Johnson, Sarah C.", "Wang, Jun", "Foijer, Floris", "Spierings, Diana C. J.", "Boemo, Michael A.", "Wardenaar, René", "Mazzagatti, Alice"], "subjects": [], "titles": ["Additional file 2 of Replication stress generates distinctive landscapes of DNA copy number alterations and chromosome scale losses", "Additional file 5 of Replication stress generates distinctive landscapes of DNA copy number alterations and chromosome scale losses"], "ids": ["50|doi_dedup___::a1bfeb173971f74a274fab8bdd78a4bc", "50|doi_dedup___::3d6e151aaeb2f7c40a320207fdd80ade"], "title": "Analysis of DNA Copy Number Alterations and Chromosome Scale Losses", "summary": "This study analyzed the effects of replication stress on DNA copy number alterations and chromosome scale losses. The results show distinctive landscapes of these alterations and losses, which were further investigated in additional files. The study provides valuable insights into the mechanisms of replication stress and its impact on genomic stability.", "startDate": "2022-01-01", "endDate": "2022-01-01"} \ No newline at end of file From 6af3fd16b6362c4a9357f44ba779b7822f5b7033 Mon Sep 17 00:00:00 2001 From: miconis Date: Thu, 5 Dec 2024 14:39:42 +0100 Subject: [PATCH 46/73] attributes fixes --- .../java/eu/dnetlib/pace/tree/DateRange.java | 90 ++--- .../eu/dnetlib/pace/tree/JsonListMatch.java | 2 +- .../pace/comparators/ComparatorTest.java | 30 +- .../dnetlib/dhp/actionmanager/Constants.java | 4 +- .../raid/GenerateRAiDActionSetJob.java | 309 ++++++++++-------- .../raid/model/GenerateRAiDActionSetJob.java | 5 +- .../actionmanager/raid/model/RAiDEntity.java | 154 ++++----- .../ror/GenerateRorActionSetJob.java | 2 +- .../dhp/sx/bio/pubmed/PMAffiliation.java | 44 +-- .../dnetlib/dhp/sx/bio/pubmed/PMAuthor.java | 1 - .../dhp/sx/bio/pubmed/PMIdentifier.java | 74 ++--- .../raid/oozie_app/action_set_parameters.json | 0 .../actionmanager/raid/oozie_app/workflow.xml | 2 - .../collection/crossref/Crossref2Oaf.scala | 1 - .../dnetlib/dhp/sx/bio/pubmed/PMParser2.scala | 13 +- .../dhp/sx/bio/pubmed/PubMedToOaf.scala | 20 +- .../raid/GenerateRAiDActionSetJobTest.java | 203 +++++++----- .../dnetlib/dhp/sx/bio/BioScholixTest.scala | 1 - .../dhp/bulktag/community/ResultTagger.java | 4 +- .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 3 +- .../model/ProvisionModelSupport.java | 15 +- 21 files changed, 546 insertions(+), 431 deletions(-) delete mode 100644 dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/raid/oozie_app/action_set_parameters.json diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/DateRange.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/DateRange.java index c913109a4..194677e6e 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/DateRange.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/DateRange.java @@ -1,10 +1,5 @@ -package eu.dnetlib.pace.tree; -import com.wcohen.ss.AbstractStringDistance; -import eu.dnetlib.pace.config.Config; -import eu.dnetlib.pace.tree.support.AbstractStringComparator; -import eu.dnetlib.pace.tree.support.ComparatorClass; -import org.joda.time.DateTime; +package eu.dnetlib.pace.tree; import java.time.DateTimeException; import java.time.LocalDate; @@ -13,55 +8,62 @@ import java.time.format.DateTimeFormatter; import java.util.Locale; import java.util.Map; +import org.joda.time.DateTime; + +import com.wcohen.ss.AbstractStringDistance; + +import eu.dnetlib.pace.config.Config; +import eu.dnetlib.pace.tree.support.AbstractStringComparator; +import eu.dnetlib.pace.tree.support.ComparatorClass; + @ComparatorClass("dateRange") public class DateRange extends AbstractStringComparator { - int YEAR_RANGE; + int YEAR_RANGE; - public DateRange(Map params) { - super(params, new com.wcohen.ss.JaroWinkler()); - YEAR_RANGE = Integer.parseInt(params.getOrDefault("year_range", "3")); - } + public DateRange(Map params) { + super(params, new com.wcohen.ss.JaroWinkler()); + YEAR_RANGE = Integer.parseInt(params.getOrDefault("year_range", "3")); + } - public DateRange(final double weight) { - super(weight, new com.wcohen.ss.JaroWinkler()); - } + public DateRange(final double weight) { + super(weight, new com.wcohen.ss.JaroWinkler()); + } - protected DateRange(final double weight, final AbstractStringDistance ssalgo) { - super(weight, ssalgo); - } + protected DateRange(final double weight, final AbstractStringDistance ssalgo) { + super(weight, ssalgo); + } - public static boolean isNumeric(String str) { - return str.matches("\\d+"); //match a number with optional '-' and decimal. - } + public static boolean isNumeric(String str) { + return str.matches("\\d+"); // match a number with optional '-' and decimal. + } - @Override - public double distance(final String a, final String b, final Config conf) { - if (a.isEmpty() || b.isEmpty()) { - return -1.0; // return -1 if a field is missing - } + @Override + public double distance(final String a, final String b, final Config conf) { + if (a.isEmpty() || b.isEmpty()) { + return -1.0; // return -1 if a field is missing + } - try { - DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd", Locale.ENGLISH); - LocalDate d1 = LocalDate.parse(a, formatter); - LocalDate d2 = LocalDate.parse(b, formatter); - Period period = Period.between(d1, d2); + try { + DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd", Locale.ENGLISH); + LocalDate d1 = LocalDate.parse(a, formatter); + LocalDate d2 = LocalDate.parse(b, formatter); + Period period = Period.between(d1, d2); - return period.getYears() <= YEAR_RANGE? 1.0 : 0.0; - } - catch (DateTimeException e) { - return -1.0; - } + return period.getYears() <= YEAR_RANGE ? 1.0 : 0.0; + } catch (DateTimeException e) { + return -1.0; + } - } + } - @Override - public double getWeight() { - return super.weight; - } + @Override + public double getWeight() { + return super.weight; + } - @Override - protected double normalize(final double d) { - return d; - } + @Override + protected double normalize(final double d) { + return d; + } } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/JsonListMatch.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/JsonListMatch.java index e95d9206e..d9558df90 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/JsonListMatch.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/JsonListMatch.java @@ -62,7 +62,7 @@ public class JsonListMatch extends AbstractListComparator { Set types = Sets.intersection(typesA, typesB); - if (types.isEmpty()) // if no common type, it is impossible to compare + if (types.isEmpty()) // if no common type, it is impossible to compare return -1; ca = ca.stream().filter(s -> types.contains(s.split("::")[0])).collect(Collectors.toSet()); diff --git a/dhp-pace-core/src/test/java/eu/dnetlib/pace/comparators/ComparatorTest.java b/dhp-pace-core/src/test/java/eu/dnetlib/pace/comparators/ComparatorTest.java index 83539de4a..0abde84bc 100644 --- a/dhp-pace-core/src/test/java/eu/dnetlib/pace/comparators/ComparatorTest.java +++ b/dhp-pace-core/src/test/java/eu/dnetlib/pace/comparators/ComparatorTest.java @@ -72,14 +72,34 @@ public class ComparatorTest extends AbstractPaceTest { CodeMatch codeMatch = new CodeMatch(params); // names have different codes - assertEquals(0.0, codeMatch.distance("physical oceanography at ctd station june 1998 ev02a", "physical oceanography at ctd station june 1998 ir02", conf)); + assertEquals( + 0.0, + codeMatch + .distance( + "physical oceanography at ctd station june 1998 ev02a", + "physical oceanography at ctd station june 1998 ir02", conf)); // names have same code - assertEquals(1.0, codeMatch.distance("physical oceanography at ctd station june 1998 ev02a", "physical oceanography at ctd station june 1998 ev02a", conf)); + assertEquals( + 1.0, + codeMatch + .distance( + "physical oceanography at ctd station june 1998 ev02a", + "physical oceanography at ctd station june 1998 ev02a", conf)); // code is not in both names - assertEquals(-1, codeMatch.distance("physical oceanography at ctd station june 1998", "physical oceanography at ctd station june 1998 ev02a", conf)); - assertEquals(1.0, codeMatch.distance("physical oceanography at ctd station june 1998", "physical oceanography at ctd station june 1998", conf)); + assertEquals( + -1, + codeMatch + .distance( + "physical oceanography at ctd station june 1998", + "physical oceanography at ctd station june 1998 ev02a", conf)); + assertEquals( + 1.0, + codeMatch + .distance( + "physical oceanography at ctd station june 1998", "physical oceanography at ctd station june 1998", + conf)); } @Test @@ -275,7 +295,7 @@ public class ComparatorTest extends AbstractPaceTest { Arrays .asList( "{\"datainfo\":{\"deletedbyinference\":false,\"inferenceprovenance\":null,\"inferred\":false,\"invisible\":false,\"provenanceaction\":{\"classid\":\"sysimport:actionset\",\"classname\":\"Harvested\",\"schemeid\":\"dnet:provenanceActions\",\"schemename\":\"dnet:provenanceActions\"},\"trust\":\"0.9\"},\"qualifier\":{\"classid\":\"grid\",\"classname\":\"GRID Identifier\",\"schemeid\":\"dnet:pid_types\",\"schemename\":\"dnet:pid_types\"},\"value\":\"grid_1\"}", - "{\"datainfo\":{\"deletedbyinference\":false,\"inferenceprovenance\":null,\"inferred\":false,\"invisible\":false,\"provenanceaction\":{\"classid\":\"sysimport:actionset\",\"classname\":\"Harvested\",\"schemeid\":\"dnet:provenanceActions\",\"schemename\":\"dnet:provenanceActions\"},\"trust\":\"0.9\"},\"qualifier\":{\"classid\":\"ror\",\"classname\":\"Research Organization Registry\",\"schemeid\":\"dnet:pid_types\",\"schemename\":\"dnet:pid_types\"},\"value\":\"ror_1\"}"), + "{\"datainfo\":{\"deletedbyinference\":false,\"inferenceprovenance\":null,\"inferred\":false,\"invisible\":false,\"provenanceaction\":{\"classid\":\"sysimport:actionset\",\"classname\":\"Harvested\",\"schemeid\":\"dnet:provenanceActions\",\"schemename\":\"dnet:provenanceActions\"},\"trust\":\"0.9\"},\"qualifier\":{\"classid\":\"ror\",\"classname\":\"Research Organization Registry\",\"schemeid\":\"dnet:pid_types\",\"schemename\":\"dnet:pid_types\"},\"value\":\"ror_1\"}"), "authors"); List b = createFieldList( Arrays diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java index 722415c2e..394cc22a3 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java @@ -3,8 +3,6 @@ package eu.dnetlib.dhp.actionmanager; import java.util.Optional; -import eu.dnetlib.dhp.schema.oaf.Instance; -import eu.dnetlib.dhp.schema.oaf.Qualifier; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; @@ -15,6 +13,8 @@ import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.Instance; +import eu.dnetlib.dhp.schema.oaf.Qualifier; import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import eu.dnetlib.dhp.schema.oaf.Subject; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/raid/GenerateRAiDActionSetJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/raid/GenerateRAiDActionSetJob.java index 8e5e1bdcb..3b2405956 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/raid/GenerateRAiDActionSetJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/raid/GenerateRAiDActionSetJob.java @@ -1,15 +1,15 @@ + package eu.dnetlib.dhp.actionmanager.raid; -import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.actionmanager.raid.model.RAiDEntity; -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.common.Constants; -import eu.dnetlib.dhp.common.HdfsSupport; -import eu.dnetlib.dhp.schema.action.AtomicAction; -import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.*; -import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; -import eu.dnetlib.dhp.utils.DHPUtils; +import static eu.dnetlib.dhp.actionmanager.personentity.ExtractPerson.OPENAIRE_DATASOURCE_ID; +import static eu.dnetlib.dhp.actionmanager.personentity.ExtractPerson.OPENAIRE_DATASOURCE_NAME; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import static eu.dnetlib.dhp.schema.common.ModelConstants.*; +import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*; + +import java.util.*; +import java.util.stream.Collectors; + import org.apache.commons.io.IOUtils; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.SequenceFileOutputFormat; @@ -19,172 +19,191 @@ import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.actionmanager.raid.model.RAiDEntity; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.Constants; +import eu.dnetlib.dhp.common.HdfsSupport; +import eu.dnetlib.dhp.schema.action.AtomicAction; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; +import eu.dnetlib.dhp.utils.DHPUtils; import scala.Tuple2; -import java.util.*; -import java.util.stream.Collectors; - -import static eu.dnetlib.dhp.actionmanager.personentity.ExtractPerson.OPENAIRE_DATASOURCE_ID; -import static eu.dnetlib.dhp.actionmanager.personentity.ExtractPerson.OPENAIRE_DATASOURCE_NAME; -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; -import static eu.dnetlib.dhp.schema.common.ModelConstants.*; -import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*; - public class GenerateRAiDActionSetJob { - private static final Logger log = LoggerFactory.getLogger(eu.dnetlib.dhp.actionmanager.raid.GenerateRAiDActionSetJob.class); + private static final Logger log = LoggerFactory + .getLogger(eu.dnetlib.dhp.actionmanager.raid.GenerateRAiDActionSetJob.class); - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - private static final List RAID_COLLECTED_FROM = listKeyValues( - OPENAIRE_DATASOURCE_ID, OPENAIRE_DATASOURCE_NAME); + private static final List RAID_COLLECTED_FROM = listKeyValues( + OPENAIRE_DATASOURCE_ID, OPENAIRE_DATASOURCE_NAME); - private static final Qualifier RAID_QUALIFIER = qualifier("raid:openaireinference", "raid:openaireinference", DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS); + private static final Qualifier RAID_QUALIFIER = qualifier("0049", "Research Activity Identifier", DNET_PUBLICATION_RESOURCE, DNET_PUBLICATION_RESOURCE); - private static final DataInfo RAID_DATA_INFO = dataInfo( - false, OPENAIRE_DATASOURCE_NAME, true, false, RAID_QUALIFIER, "0.92"); + private static final Qualifier RAID_INFERENCE_QUALIFIER = qualifier( + "raid:openaireinference", "Inferred by OpenAIRE", DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS); - public static void main(final String[] args) throws Exception { + private static final DataInfo RAID_DATA_INFO = dataInfo( + false, OPENAIRE_DATASOURCE_NAME, true, false, RAID_INFERENCE_QUALIFIER, "0.92"); - final String jsonConfiguration = IOUtils - .toString( - eu.dnetlib.dhp.actionmanager.raid.GenerateRAiDActionSetJob.class - .getResourceAsStream("/eu/dnetlib/dhp/actionmanager/raid/action_set_parameters.json")); + public static void main(final String[] args) throws Exception { - final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); + final String jsonConfiguration = IOUtils + .toString( + eu.dnetlib.dhp.actionmanager.raid.GenerateRAiDActionSetJob.class + .getResourceAsStream("/eu/dnetlib/dhp/actionmanager/raid/action_set_parameters.json")); - parser.parseArgument(args); + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); - final Boolean isSparkSessionManaged = Optional - .ofNullable(parser.get("isSparkSessionManaged")) - .map(Boolean::valueOf) - .orElse(Boolean.TRUE); + parser.parseArgument(args); - log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + final Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); - final String inputPath = parser.get("inputPath"); - log.info("inputPath: {}", inputPath); + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - final String outputPath = parser.get("outputPath"); - log.info("outputPath {}: ", outputPath); + final String inputPath = parser.get("inputPath"); + log.info("inputPath: {}", inputPath); - final SparkConf conf = new SparkConf(); + final String outputPath = parser.get("outputPath"); + log.info("outputPath {}: ", outputPath); - runWithSparkSession(conf, isSparkSessionManaged, spark -> { - removeOutputDir(spark, outputPath); - processRAiDEntities(spark, inputPath, outputPath); - }); - } + final SparkConf conf = new SparkConf(); - private static void removeOutputDir(final SparkSession spark, final String path) { - HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration()); - } + runWithSparkSession(conf, isSparkSessionManaged, spark -> { + removeOutputDir(spark, outputPath); + processRAiDEntities(spark, inputPath, outputPath); + }); + } - static void processRAiDEntities(final SparkSession spark, - final String inputPath, - final String outputPath) { - readInputPath(spark, inputPath) - .map(GenerateRAiDActionSetJob::prepareRAiD) - .flatMap(List::iterator) - .mapToPair( - aa -> new Tuple2<>(new Text(aa.getClazz().getCanonicalName()), - new Text(OBJECT_MAPPER.writeValueAsString(aa)))) - .saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class); + private static void removeOutputDir(final SparkSession spark, final String path) { + HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration()); + } - } + static void processRAiDEntities(final SparkSession spark, + final String inputPath, + final String outputPath) { + readInputPath(spark, inputPath) + .map(GenerateRAiDActionSetJob::prepareRAiD) + .flatMap(List::iterator) + .mapToPair( + aa -> new Tuple2<>(new Text(aa.getClazz().getCanonicalName()), + new Text(OBJECT_MAPPER.writeValueAsString(aa)))) + .saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class); - protected static List> prepareRAiD(final RAiDEntity r) { + } - final Date now = new Date(); - final OtherResearchProduct orp = new OtherResearchProduct(); - final List> res = new ArrayList<>(); - String raidId = calculateOpenaireId(r.getRaid()); + protected static List> prepareRAiD(final RAiDEntity r) { - orp.setId(raidId); - orp.setCollectedfrom(RAID_COLLECTED_FROM); - orp.setDataInfo(RAID_DATA_INFO); - orp.setResourcetype(RAID_QUALIFIER); - orp.setTitle( - Collections.singletonList( - structuredProperty( - r.getTitle(), - qualifier("main title", "main title", DNET_DATACITE_TITLE, DNET_DATACITE_TITLE), - RAID_DATA_INFO)) - ); - orp.setDescription(listFields(RAID_DATA_INFO, r.getSummary())); - orp.setAuthor(createAuthors(r.getAuthors())); - orp.setInstance(Collections.singletonList(eu.dnetlib.dhp.actionmanager.Constants.getInstance(RAID_QUALIFIER))); - orp.setSubject( - r.getSubjects() - .stream() - .map(s -> subject(s, qualifier(DNET_SUBJECT_KEYWORD, DNET_SUBJECT_KEYWORD, DNET_SUBJECT_TYPOLOGIES, DNET_SUBJECT_TYPOLOGIES), RAID_DATA_INFO)) - .collect(Collectors.toList()) - ); - orp.setRelevantdate( - Arrays.asList( - structuredProperty(r.getEndDate(), qualifier("endDate","endDate", DNET_DATACITE_DATE, DNET_DATACITE_DATE), RAID_DATA_INFO), - structuredProperty(r.getStartDate(), qualifier("startDate", "startDate", DNET_DATACITE_DATE, DNET_DATACITE_DATE), RAID_DATA_INFO) - ) - ); - orp.setLastupdatetimestamp(now.getTime()); - orp.setDateofcollection(r.getStartDate()); + final Date now = new Date(); + final OtherResearchProduct orp = new OtherResearchProduct(); + final List> res = new ArrayList<>(); + String raidId = calculateOpenaireId(r.getRaid()); - res.add(new AtomicAction<>(OtherResearchProduct.class, orp)); + orp.setId(raidId); + orp.setCollectedfrom(RAID_COLLECTED_FROM); + orp.setDataInfo(RAID_DATA_INFO); + orp + .setTitle( + Collections + .singletonList( + structuredProperty( + r.getTitle(), + qualifier("main title", "main title", DNET_DATACITE_TITLE, DNET_DATACITE_TITLE), + RAID_DATA_INFO))); + orp.setDescription(listFields(RAID_DATA_INFO, r.getSummary())); +// orp.setAuthor(createAuthors(r.getAuthors())); + orp.setInstance(Collections.singletonList(eu.dnetlib.dhp.actionmanager.Constants.getInstance(RAID_QUALIFIER))); + orp + .setSubject( + r + .getSubjects() + .stream() + .map( + s -> subject( + s, + qualifier( + DNET_SUBJECT_KEYWORD, DNET_SUBJECT_KEYWORD, DNET_SUBJECT_TYPOLOGIES, + DNET_SUBJECT_TYPOLOGIES), + RAID_DATA_INFO)) + .collect(Collectors.toList())); + orp + .setRelevantdate( + Arrays + .asList( + structuredProperty( + r.getEndDate(), qualifier("endDate", "endDate", DNET_DATACITE_DATE, DNET_DATACITE_DATE), + RAID_DATA_INFO), + structuredProperty( + r.getStartDate(), + qualifier("startDate", "startDate", DNET_DATACITE_DATE, DNET_DATACITE_DATE), + RAID_DATA_INFO))); + orp.setLastupdatetimestamp(now.getTime()); + orp.setDateofacceptance(field(r.getStartDate(), RAID_DATA_INFO)); - for(String resultId: r.getIds()) { - Relation rel1 = OafMapperUtils.getRelation( - raidId, - resultId, - ModelConstants.RESULT_RESULT, - ModelConstants.OUTCOME, - PART, - RAID_COLLECTED_FROM, - RAID_DATA_INFO, - now.getTime(), - null, - null - ); - Relation rel2 = OafMapperUtils.getRelation( - resultId, - raidId, - ModelConstants.RESULT_RESULT, - ModelConstants.OUTCOME, - IS_PART_OF, - RAID_COLLECTED_FROM, - RAID_DATA_INFO, - now.getTime(), - null, - null - ); - res.add(new AtomicAction<>(Relation.class, rel1)); - res.add(new AtomicAction<>(Relation.class, rel2)); - } + res.add(new AtomicAction<>(OtherResearchProduct.class, orp)); - return res; - } + for (String resultId : r.getIds()) { + Relation rel1 = OafMapperUtils + .getRelation( + raidId, + resultId, + ModelConstants.RESULT_RESULT, + PART, + HAS_PART, + RAID_COLLECTED_FROM, + RAID_DATA_INFO, + now.getTime(), + null, + null); + Relation rel2 = OafMapperUtils + .getRelation( + resultId, + raidId, + ModelConstants.RESULT_RESULT, + PART, + IS_PART_OF, + RAID_COLLECTED_FROM, + RAID_DATA_INFO, + now.getTime(), + null, + null); + res.add(new AtomicAction<>(Relation.class, rel1)); + res.add(new AtomicAction<>(Relation.class, rel2)); + } - public static String calculateOpenaireId(final String raid) { - return String.format("50|%s::%s", Constants.RAID_NS_PREFIX, DHPUtils.md5(raid)); - } + return res; + } - public static List createAuthors(final List author) { - return author.stream().map(s-> { - Author a = new Author(); - a.setFullname(s); - return a; - }).collect(Collectors.toList()); - } + public static String calculateOpenaireId(final String raid) { + return String.format("50|%s::%s", Constants.RAID_NS_PREFIX, DHPUtils.md5(raid)); + } - private static JavaRDD readInputPath( - final SparkSession spark, - final String path) { + public static List createAuthors(final List author) { + return author.stream().map(s -> { + Author a = new Author(); + a.setFullname(s); + return a; + }).collect(Collectors.toList()); + } - return spark - .read() - .json(path) - .as(Encoders.bean(RAiDEntity.class)) - .toJavaRDD(); + private static JavaRDD readInputPath( + final SparkSession spark, + final String path) { - } + return spark + .read() + .json(path) + .as(Encoders.bean(RAiDEntity.class)) + .toJavaRDD(); + + } } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/raid/model/GenerateRAiDActionSetJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/raid/model/GenerateRAiDActionSetJob.java index b0aec71d3..856b52e18 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/raid/model/GenerateRAiDActionSetJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/raid/model/GenerateRAiDActionSetJob.java @@ -1,2 +1,5 @@ -package eu.dnetlib.dhp.actionmanager.raid.model;public class GenerateRAiDActionSetJob { + +package eu.dnetlib.dhp.actionmanager.raid.model; + +public class GenerateRAiDActionSetJob { } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/raid/model/RAiDEntity.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/raid/model/RAiDEntity.java index bd7e28926..1203b28a7 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/raid/model/RAiDEntity.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/raid/model/RAiDEntity.java @@ -1,3 +1,4 @@ + package eu.dnetlib.dhp.actionmanager.raid.model; import java.io.Serializable; @@ -5,98 +6,101 @@ import java.util.List; public class RAiDEntity implements Serializable { - String raid; - List authors; - String startDate; - String endDate; - List subjects; - List titles; - List ids; - String title; - String summary; + String raid; + List authors; + String startDate; + String endDate; + List subjects; + List titles; + List ids; + String title; + String summary; - public RAiDEntity(){} - public RAiDEntity(String raid, List authors, String startDate, String endDate, List subjects, List titles, List ids, String title, String summary) { - this.raid = raid; - this.authors = authors; - this.startDate = startDate; - this.endDate = endDate; - this.subjects = subjects; - this.titles = titles; - this.ids = ids; - this.title = title; - this.summary = summary; - } + public RAiDEntity() { + } - public String getRaid() { - return raid; - } + public RAiDEntity(String raid, List authors, String startDate, String endDate, List subjects, + List titles, List ids, String title, String summary) { + this.raid = raid; + this.authors = authors; + this.startDate = startDate; + this.endDate = endDate; + this.subjects = subjects; + this.titles = titles; + this.ids = ids; + this.title = title; + this.summary = summary; + } - public void setRaid(String raid) { - this.raid = raid; - } + public String getRaid() { + return raid; + } - public List getAuthors() { - return authors; - } + public void setRaid(String raid) { + this.raid = raid; + } - public void setAuthors(List authors) { - this.authors = authors; - } + public List getAuthors() { + return authors; + } - public String getStartDate() { - return startDate; - } + public void setAuthors(List authors) { + this.authors = authors; + } - public void setStartDate(String startDate) { - this.startDate = startDate; - } + public String getStartDate() { + return startDate; + } - public String getEndDate() { - return endDate; - } + public void setStartDate(String startDate) { + this.startDate = startDate; + } - public void setEndDate(String endDate) { - this.endDate = endDate; - } + public String getEndDate() { + return endDate; + } - public List getSubjects() { - return subjects; - } + public void setEndDate(String endDate) { + this.endDate = endDate; + } - public void setSubjects(List subjects) { - this.subjects = subjects; - } + public List getSubjects() { + return subjects; + } - public List getTitles() { - return titles; - } + public void setSubjects(List subjects) { + this.subjects = subjects; + } - public void setTitles(List titles) { - this.titles = titles; - } + public List getTitles() { + return titles; + } - public List getIds() { - return ids; - } + public void setTitles(List titles) { + this.titles = titles; + } - public void setIds(List ids) { - this.ids = ids; - } + public List getIds() { + return ids; + } - public String getTitle() { - return title; - } + public void setIds(List ids) { + this.ids = ids; + } - public void setTitle(String title) { - this.title = title; - } + public String getTitle() { + return title; + } - public String getSummary() { - return summary; - } + public void setTitle(String title) { + this.title = title; + } - public void setSummary(String summary) { - this.summary = summary; - } + public String getSummary() { + return summary; + } + + public void setSummary(String summary) { + this.summary = summary; + } } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJob.java index ce1973a7f..6e8f48bda 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJob.java @@ -21,7 +21,6 @@ import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; -import eu.dnetlib.dhp.schema.oaf.*; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; @@ -45,6 +44,7 @@ import eu.dnetlib.dhp.common.Constants; import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.schema.action.AtomicAction; import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.utils.DHPUtils; import scala.Tuple2; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMAffiliation.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMAffiliation.java index a8dacd132..5ac1920ea 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMAffiliation.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMAffiliation.java @@ -1,3 +1,4 @@ + package eu.dnetlib.dhp.sx.bio.pubmed; /** @@ -7,32 +8,33 @@ package eu.dnetlib.dhp.sx.bio.pubmed; */ public class PMAffiliation { - private String name; + private String name; - private PMIdentifier identifier; + private PMIdentifier identifier; - public PMAffiliation() { + public PMAffiliation() { - } - public PMAffiliation(String name, PMIdentifier identifier) { - this.name = name; - this.identifier = identifier; - } + } - public String getName() { - return name; - } + public PMAffiliation(String name, PMIdentifier identifier) { + this.name = name; + this.identifier = identifier; + } - public void setName(String name) { - this.name = name; - } + public String getName() { + return name; + } - public PMIdentifier getIdentifier() { - return identifier; - } + public void setName(String name) { + this.name = name; + } - public PMAffiliation setIdentifier(PMIdentifier identifier) { - this.identifier = identifier; - return this; - } + public PMIdentifier getIdentifier() { + return identifier; + } + + public PMAffiliation setIdentifier(PMIdentifier identifier) { + this.identifier = identifier; + return this; + } } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMAuthor.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMAuthor.java index b0df25663..e023f2e62 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMAuthor.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMAuthor.java @@ -97,5 +97,4 @@ public class PMAuthor implements Serializable { this.affiliation = affiliation; } - } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMIdentifier.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMIdentifier.java index 0c8c55e40..6cd17a90c 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMIdentifier.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMIdentifier.java @@ -1,53 +1,53 @@ + package eu.dnetlib.dhp.sx.bio.pubmed; public class PMIdentifier { - private String pid; - private String type; + private String pid; + private String type; + public PMIdentifier(String pid, String type) { + this.pid = cleanPid(pid); + this.type = type; + } - public PMIdentifier(String pid, String type) { - this.pid = cleanPid(pid); - this.type = type; - } + public PMIdentifier() { - public PMIdentifier() { + } - } + private String cleanPid(String pid) { - private String cleanPid(String pid) { + if (pid == null) { + return null; + } - if (pid == null) { - return null; - } + // clean ORCID ID in the form 0000000163025705 to 0000-0001-6302-5705 + if (pid.matches("[0-9]{15}[0-9X]")) { + return pid.replaceAll("(.{4})(.{4})(.{4})(.{4})", "$1-$2-$3-$4"); + } - // clean ORCID ID in the form 0000000163025705 to 0000-0001-6302-5705 - if (pid.matches("[0-9]{15}[0-9X]")) { - return pid.replaceAll("(.{4})(.{4})(.{4})(.{4})", "$1-$2-$3-$4"); - } + // clean ORCID in the form http://orcid.org/0000-0001-8567-3543 to 0000-0001-8567-3543 + if (pid.matches("http://orcid.org/[0-9]{4}-[0-9]{4}-[0-9]{4}-[0-9]{4}")) { + return pid.replaceAll("http://orcid.org/", ""); + } + return pid; + } - // clean ORCID in the form http://orcid.org/0000-0001-8567-3543 to 0000-0001-8567-3543 - if (pid.matches("http://orcid.org/[0-9]{4}-[0-9]{4}-[0-9]{4}-[0-9]{4}")) { - return pid.replaceAll("http://orcid.org/", ""); - } - return pid; - } + public String getPid() { + return pid; + } - public String getPid() { - return pid; - } + public PMIdentifier setPid(String pid) { + this.pid = cleanPid(pid); + return this; + } - public PMIdentifier setPid(String pid) { - this.pid = cleanPid(pid); - return this; - } + public String getType() { + return type; + } - public String getType() { - return type; - } - - public PMIdentifier setType(String type) { - this.type = type; - return this; - } + public PMIdentifier setType(String type) { + this.type = type; + return this; + } } diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/raid/oozie_app/action_set_parameters.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/raid/oozie_app/action_set_parameters.json deleted file mode 100644 index e69de29bb..000000000 diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/raid/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/raid/oozie_app/workflow.xml index 9b5aa5905..d3392596f 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/raid/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/raid/oozie_app/workflow.xml @@ -20,8 +20,6 @@ - - diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/crossref/Crossref2Oaf.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/crossref/Crossref2Oaf.scala index e4a238c8f..de68ebb58 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/crossref/Crossref2Oaf.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/crossref/Crossref2Oaf.scala @@ -673,7 +673,6 @@ case object Crossref2Oaf { val doi = input.getString(0) val rorId = input.getString(1) - val pubId = IdentifierFactory.idFromPid("50", "doi", DoiCleaningRule.clean(doi), true) val affId = GenerateRorActionSetJob.calculateOpenaireId(rorId) diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PMParser2.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PMParser2.scala index 2eb4bea65..bc9a2cf02 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PMParser2.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PMParser2.scala @@ -82,21 +82,22 @@ class PMParser2 { a.setLastName((author \ "LastName").text) a.setForeName((author \ "ForeName").text) val id = (author \ "Identifier").text - val idType =(author \ "Identifier" \ "@Source").text + val idType = (author \ "Identifier" \ "@Source").text - if(id != null && id.nonEmpty && idType != null && idType.nonEmpty) { + if (id != null && id.nonEmpty && idType != null && idType.nonEmpty) { a.setIdentifier(new PMIdentifier(id, idType)) } - val affiliation = (author \ "AffiliationInfo" \ "Affiliation").text - val affiliationId = (author \ "AffiliationInfo" \ "Identifier").text + val affiliationId = (author \ "AffiliationInfo" \ "Identifier").text val affiliationIdType = (author \ "AffiliationInfo" \ "Identifier" \ "@Source").text - if(affiliation != null && affiliation.nonEmpty) { + if (affiliation != null && affiliation.nonEmpty) { val aff = new PMAffiliation() aff.setName(affiliation) - if(affiliationId != null && affiliationId.nonEmpty && affiliationIdType != null && affiliationIdType.nonEmpty) { + if ( + affiliationId != null && affiliationId.nonEmpty && affiliationIdType != null && affiliationIdType.nonEmpty + ) { aff.setIdentifier(new PMIdentifier(affiliationId, affiliationIdType)) } a.setAffiliation(aff) diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala index 5e14c731a..281ca0e07 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala @@ -294,11 +294,23 @@ object PubMedToOaf { author.setName(a.getForeName) author.setSurname(a.getLastName) author.setFullname(a.getFullName) - if(a.getIdentifier != null) { - author.setPid(List(OafMapperUtils.structuredProperty(a.getIdentifier.getPid, - OafMapperUtils.qualifier(a.getIdentifier.getType,a.getIdentifier.getType,ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES), dataInfo)).asJava) + if (a.getIdentifier != null) { + author.setPid( + List( + OafMapperUtils.structuredProperty( + a.getIdentifier.getPid, + OafMapperUtils.qualifier( + a.getIdentifier.getType, + a.getIdentifier.getType, + ModelConstants.DNET_PID_TYPES, + ModelConstants.DNET_PID_TYPES + ), + dataInfo + ) + ).asJava + ) } - if (a.getAffiliation!= null) + if (a.getAffiliation != null) author.setRawAffiliationString(List(a.getAffiliation.getName).asJava) author.setRank(index + 1) author diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/raid/GenerateRAiDActionSetJobTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/raid/GenerateRAiDActionSetJobTest.java index 1f33f45b2..9417822af 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/raid/GenerateRAiDActionSetJobTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/raid/GenerateRAiDActionSetJobTest.java @@ -1,11 +1,16 @@ + package eu.dnetlib.dhp.actionmanager.raid; -import eu.dnetlib.dhp.actionmanager.opencitations.CreateOpenCitationsASTest; -import eu.dnetlib.dhp.actionmanager.raid.model.RAiDEntity; -import eu.dnetlib.dhp.schema.action.AtomicAction; -import eu.dnetlib.dhp.schema.oaf.Oaf; -import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct; -import eu.dnetlib.dhp.schema.oaf.Relation; +import static java.nio.file.Files.createTempDirectory; + +import static eu.dnetlib.dhp.actionmanager.Constants.OBJECT_MAPPER; +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.io.File; +import java.nio.file.Paths; +import java.util.Arrays; +import java.util.List; + import org.apache.commons.io.FileUtils; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat; @@ -20,93 +25,141 @@ import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; + +import eu.dnetlib.dhp.actionmanager.opencitations.CreateOpenCitationsASTest; +import eu.dnetlib.dhp.actionmanager.raid.model.RAiDEntity; +import eu.dnetlib.dhp.schema.action.AtomicAction; +import eu.dnetlib.dhp.schema.oaf.Oaf; +import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct; +import eu.dnetlib.dhp.schema.oaf.Relation; import scala.Tuple2; -import java.io.File; -import java.nio.file.Paths; -import java.util.Arrays; -import java.util.List; - -import static eu.dnetlib.dhp.actionmanager.Constants.OBJECT_MAPPER; -import static java.nio.file.Files.createTempDirectory; -import static org.junit.jupiter.api.Assertions.assertEquals; - public class GenerateRAiDActionSetJobTest { - private static String input_path; - private static String output_path; - static SparkSession spark; + private static String input_path; + private static String output_path; + static SparkSession spark; - @BeforeEach - void setUp() throws Exception { + @BeforeEach + void setUp() throws Exception { - input_path = Paths - .get(GenerateRAiDActionSetJobTest.class.getResource("/eu/dnetlib/dhp/actionmanager/raid/raid_example.json").toURI()) - .toFile() - .getAbsolutePath(); + input_path = Paths + .get( + GenerateRAiDActionSetJobTest.class + .getResource("/eu/dnetlib/dhp/actionmanager/raid/raid_example.json") + .toURI()) + .toFile() + .getAbsolutePath(); - output_path = createTempDirectory(GenerateRAiDActionSetJobTest.class.getSimpleName() + "-") - .toAbsolutePath() - .toString(); + output_path = createTempDirectory(GenerateRAiDActionSetJobTest.class.getSimpleName() + "-") + .toAbsolutePath() + .toString(); - SparkConf conf = new SparkConf(); - conf.setAppName(GenerateRAiDActionSetJobTest.class.getSimpleName()); + SparkConf conf = new SparkConf(); + conf.setAppName(GenerateRAiDActionSetJobTest.class.getSimpleName()); - conf.setMaster("local[*]"); - conf.set("spark.driver.host", "localhost"); - conf.set("hive.metastore.local", "true"); - conf.set("spark.ui.enabled", "false"); - conf.set("spark.sql.warehouse.dir", output_path); - conf.set("hive.metastore.warehouse.dir", output_path); + conf.setMaster("local[*]"); + conf.set("spark.driver.host", "localhost"); + conf.set("hive.metastore.local", "true"); + conf.set("spark.ui.enabled", "false"); + conf.set("spark.sql.warehouse.dir", output_path); + conf.set("hive.metastore.warehouse.dir", output_path); - spark = SparkSession - .builder() - .appName(GenerateRAiDActionSetJobTest.class.getSimpleName()) - .config(conf) - .getOrCreate(); - } + spark = SparkSession + .builder() + .appName(GenerateRAiDActionSetJobTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + } - @AfterAll - static void cleanUp() throws Exception { - FileUtils.deleteDirectory(new File(output_path)); - } + @AfterAll + static void cleanUp() throws Exception { + FileUtils.deleteDirectory(new File(output_path)); + } - @Test - @Disabled - void testProcessRAiDEntities() { - GenerateRAiDActionSetJob.processRAiDEntities(spark, input_path, output_path + "/test_raid_action_set"); + @Test + @Disabled + void testProcessRAiDEntities() { + GenerateRAiDActionSetJob.processRAiDEntities(spark, input_path, output_path + "/test_raid_action_set"); - JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - JavaRDD result = sc - .sequenceFile(output_path + "/test_raid_action_set", Text.class, Text.class) - .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) - .map(AtomicAction::getPayload); + JavaRDD result = sc + .sequenceFile(output_path + "/test_raid_action_set", Text.class, Text.class) + .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) + .map(AtomicAction::getPayload); - assertEquals(80, result.count()); - } + assertEquals(80, result.count()); + } - @Test - void testPrepareRAiD() { + @Test + void testPrepareRAiD() { - List> atomicActions = GenerateRAiDActionSetJob.prepareRAiD(new RAiDEntity( - "-92190526", - Arrays.asList("Berli, Justin", "Le Mao, Bérénice", "Guillaume Touya", "Wenclik, Laura", "Courtial, Azelle", "Muehlenhaus, Ian", "Justin Berli", "Touya, Guillaume", "Gruget, Maïeul", "Azelle Courtial", "Ian Muhlenhaus", "Maïeul Gruget", "Marion Dumont", "Maïeul GRUGET", "Cécile Duchêne"), - "2021-09-10", - "2024-02-16", - Arrays.asList("cartography, zoom, pan, desert fog", "Road network", "zooming", "Pan-scalar maps", "pan-scalar map", "Python library", "QGIS", "map design", "landmarks", "Cartes transscalaires", "anchor", "disorientation", "[INFO]Computer Science [cs]", "[SHS.GEO]Humanities and Social Sciences/Geography", "cognitive cartography", "eye-tracking", "Computers in Earth Sciences", "Topographic map", "National Mapping Agency", "General Medicine", "Geography, Planning and Development", "multi-scales", "pan-scalar maps", "Selection", "cartography", "General Earth and Planetary Sciences", "progressiveness", "map generalisation", "Eye-tracker", "zoom", "algorithms", "Map Design", "cartography, map generalisation, zoom, multi-scale map", "Interactive maps", "Map generalisation", "Earth and Planetary Sciences (miscellaneous)", "Cartographic generalization", "rivers", "Benchmark", "General Environmental Science", "open source", "drawing", "Constraint", "Multi-scale maps"), - Arrays.asList("Where do people look at during multi-scale map tasks?", "FogDetector survey raw data", "Collection of cartographic disorientation stories", "Anchorwhat dataset", "BasqueRoads: A Benchmark for Road Network Selection", "Progressive river network selection for pan-scalar maps", "BasqueRoads, a dataset to benchmark road selection algorithms", "Missing the city for buildings? A critical review of pan-scalar map generalization and design in contemporary zoomable maps", "Empirical approach to advance the generalisation of multi-scale maps", "L'Alpe d'Huez: a dataset to benchmark topographic map generalisation", "eye-tracking data from a survey on zooming in a pan-scalar map", "Material of the experiment 'More is Less' from the MapMuxing project", "Cartagen4py, an open source Python library for map generalisation", "L’Alpe d’Huez: A Benchmark for Topographic Map Generalisation"), - Arrays.asList("50|doi_dedup___::6915135e0aa39f913394513f809ae58a", "50|doi_dedup___::754e3c283639bc6e104c925ff3e34007", "50|doi_dedup___::13517477f3c1261d57a3364363ce6ce0", "50|doi_dedup___::675b16c73accc4e7242bbb4ed9b3724a", "50|doi_dedup___::94ce09906b2d7d37eb2206cea8a50153", "50|dedup_wf_002::cc575d5ca5651ff8c3029a3a76e7e70a", "50|doi_dedup___::c5e52baddda17c755d1bae012a97dc13", "50|doi_dedup___::4f5f38c9e08fe995f7278963183f8ad4", "50|doi_dedup___::a9bc4453273b2d02648a5cb453195042", "50|doi_dedup___::5e893dc0cb7624a33f41c9b428bd59f7", "50|doi_dedup___::c1ecdef48fd9be811a291deed950e1c5", "50|doi_dedup___::9e93c8f2d97c35de8a6a57a5b53ef283", "50|dedup_wf_002::d08be0ed27b13d8a880e891e08d093ea", "50|doi_dedup___::f8d8b3b9eddeca2fc0e3bc9e63996555"), - "Exploring Multi-Scale Map Generalization and Design", - "This project aims to advance the generalization of multi-scale maps by investigating the impact of different design elements on user experience. The research involves collecting and analyzing data from various sources, including surveys, eye-tracking studies, and user experiments. The goal is to identify best practices for map generalization and design, with a focus on reducing disorientation and improving information retrieval during exploration. The project has led to the development of several datasets, including BasqueRoads, AnchorWhat, and L'Alpe d'Huez, which can be used to benchmark road selection algorithms and topographic map generalization techniques. The research has also resulted in the creation of a Python library, Cartagen4py, for map generalization. The findings of this project have the potential to improve the design and usability of multi-scale maps, making them more effective tools for navigation and information retrieval." - )); + List> atomicActions = GenerateRAiDActionSetJob + .prepareRAiD( + new RAiDEntity( + "-92190526", + Arrays + .asList( + "Berli, Justin", "Le Mao, Bérénice", "Guillaume Touya", "Wenclik, Laura", + "Courtial, Azelle", "Muehlenhaus, Ian", "Justin Berli", "Touya, Guillaume", + "Gruget, Maïeul", "Azelle Courtial", "Ian Muhlenhaus", "Maïeul Gruget", "Marion Dumont", + "Maïeul GRUGET", "Cécile Duchêne"), + "2021-09-10", + "2024-02-16", + Arrays + .asList( + "cartography, zoom, pan, desert fog", "Road network", "zooming", "Pan-scalar maps", + "pan-scalar map", "Python library", "QGIS", "map design", "landmarks", + "Cartes transscalaires", "anchor", "disorientation", "[INFO]Computer Science [cs]", + "[SHS.GEO]Humanities and Social Sciences/Geography", "cognitive cartography", + "eye-tracking", "Computers in Earth Sciences", "Topographic map", "National Mapping Agency", + "General Medicine", "Geography, Planning and Development", "multi-scales", + "pan-scalar maps", "Selection", "cartography", "General Earth and Planetary Sciences", + "progressiveness", "map generalisation", "Eye-tracker", "zoom", "algorithms", "Map Design", + "cartography, map generalisation, zoom, multi-scale map", "Interactive maps", + "Map generalisation", "Earth and Planetary Sciences (miscellaneous)", + "Cartographic generalization", "rivers", "Benchmark", "General Environmental Science", + "open source", "drawing", "Constraint", "Multi-scale maps"), + Arrays + .asList( + "Where do people look at during multi-scale map tasks?", "FogDetector survey raw data", + "Collection of cartographic disorientation stories", "Anchorwhat dataset", + "BasqueRoads: A Benchmark for Road Network Selection", + "Progressive river network selection for pan-scalar maps", + "BasqueRoads, a dataset to benchmark road selection algorithms", + "Missing the city for buildings? A critical review of pan-scalar map generalization and design in contemporary zoomable maps", + "Empirical approach to advance the generalisation of multi-scale maps", + "L'Alpe d'Huez: a dataset to benchmark topographic map generalisation", + "eye-tracking data from a survey on zooming in a pan-scalar map", + "Material of the experiment 'More is Less' from the MapMuxing project", + "Cartagen4py, an open source Python library for map generalisation", + "L’Alpe d’Huez: A Benchmark for Topographic Map Generalisation"), + Arrays + .asList( + "50|doi_dedup___::6915135e0aa39f913394513f809ae58a", + "50|doi_dedup___::754e3c283639bc6e104c925ff3e34007", + "50|doi_dedup___::13517477f3c1261d57a3364363ce6ce0", + "50|doi_dedup___::675b16c73accc4e7242bbb4ed9b3724a", + "50|doi_dedup___::94ce09906b2d7d37eb2206cea8a50153", + "50|dedup_wf_002::cc575d5ca5651ff8c3029a3a76e7e70a", + "50|doi_dedup___::c5e52baddda17c755d1bae012a97dc13", + "50|doi_dedup___::4f5f38c9e08fe995f7278963183f8ad4", + "50|doi_dedup___::a9bc4453273b2d02648a5cb453195042", + "50|doi_dedup___::5e893dc0cb7624a33f41c9b428bd59f7", + "50|doi_dedup___::c1ecdef48fd9be811a291deed950e1c5", + "50|doi_dedup___::9e93c8f2d97c35de8a6a57a5b53ef283", + "50|dedup_wf_002::d08be0ed27b13d8a880e891e08d093ea", + "50|doi_dedup___::f8d8b3b9eddeca2fc0e3bc9e63996555"), + "Exploring Multi-Scale Map Generalization and Design", + "This project aims to advance the generalization of multi-scale maps by investigating the impact of different design elements on user experience. The research involves collecting and analyzing data from various sources, including surveys, eye-tracking studies, and user experiments. The goal is to identify best practices for map generalization and design, with a focus on reducing disorientation and improving information retrieval during exploration. The project has led to the development of several datasets, including BasqueRoads, AnchorWhat, and L'Alpe d'Huez, which can be used to benchmark road selection algorithms and topographic map generalization techniques. The research has also resulted in the creation of a Python library, Cartagen4py, for map generalization. The findings of this project have the potential to improve the design and usability of multi-scale maps, making them more effective tools for navigation and information retrieval.")); - OtherResearchProduct orp = (OtherResearchProduct) atomicActions.get(0).getPayload(); - Relation rel = (Relation) atomicActions.get(1).getPayload(); + OtherResearchProduct orp = (OtherResearchProduct) atomicActions.get(0).getPayload(); + Relation rel = (Relation) atomicActions.get(1).getPayload(); - assertEquals("Exploring Multi-Scale Map Generalization and Design", orp.getTitle().get(0).getValue()); - assertEquals("50|raid________::759a564ce5cc7360cab030c517c7366b", rel.getSource()); - assertEquals("50|doi_dedup___::6915135e0aa39f913394513f809ae58a", rel.getTarget()); + assertEquals("Exploring Multi-Scale Map Generalization and Design", orp.getTitle().get(0).getValue()); + assertEquals("50|raid________::759a564ce5cc7360cab030c517c7366b", rel.getSource()); + assertEquals("50|doi_dedup___::6915135e0aa39f913394513f809ae58a", rel.getTarget()); - } + } } diff --git a/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala b/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala index 4a926df01..cb7826dbf 100644 --- a/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala +++ b/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala @@ -63,7 +63,6 @@ class BioScholixTest extends AbstractVocabularyTest { "0000000333457333", "0000000335964515", "0000000302921949", - "http://orcid.org/0000-0001-8567-3543", "http://orcid.org/0000-0001-7868-8528", "0000-0001-9189-1440", diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java index 64cbd70ba..0d6c81627 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java @@ -130,7 +130,7 @@ public class ResultTagger implements Serializable { // log.info("Remove constraints for " + communityId); if (conf.getRemoveConstraintsMap().keySet().contains(communityId) && conf.getRemoveConstraintsMap().get(communityId).getCriteria() != null && - !conf.getRemoveConstraintsMap().get(communityId).getCriteria().isEmpty() && + !conf.getRemoveConstraintsMap().get(communityId).getCriteria().isEmpty() && conf .getRemoveConstraintsMap() .get(communityId) @@ -228,7 +228,7 @@ public class ResultTagger implements Serializable { .forEach(communityId -> { if (!removeCommunities.contains(communityId) && conf.getSelectionConstraintsMap().get(communityId).getCriteria() != null && - !conf.getSelectionConstraintsMap().get(communityId).getCriteria().isEmpty() && + !conf.getSelectionConstraintsMap().get(communityId).getCriteria().isEmpty() && conf .getSelectionConstraintsMap() .get(communityId) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index ea9503d17..e1710db54 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -915,7 +915,8 @@ class MappersTest { @Test void testODFRecord_guidelines4() throws IOException { - final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_guidelines4.xml"))); + final String xml = IOUtils + .toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_guidelines4.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); final Publication p = (Publication) list.get(0); diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java index 0da0f6955..2c977a390 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java @@ -5,7 +5,6 @@ import java.io.StringReader; import java.util.*; import java.util.stream.Collectors; -import eu.dnetlib.dhp.schema.solr.PersonTopic; import org.apache.commons.lang3.StringUtils; import org.dom4j.Document; import org.dom4j.DocumentException; @@ -40,6 +39,7 @@ import eu.dnetlib.dhp.schema.solr.OpenAccessColor; import eu.dnetlib.dhp.schema.solr.OpenAccessRoute; import eu.dnetlib.dhp.schema.solr.Organization; import eu.dnetlib.dhp.schema.solr.Person; +import eu.dnetlib.dhp.schema.solr.PersonTopic; import eu.dnetlib.dhp.schema.solr.Pid; import eu.dnetlib.dhp.schema.solr.Project; import eu.dnetlib.dhp.schema.solr.Result; @@ -216,11 +216,14 @@ public class ProvisionModelSupport { } private static List mapPersonTopics(List subjects) { - return Optional.ofNullable(subjects) - .map(ss -> ss.stream() - .map(ProvisionModelSupport::mapPersonTopic) - .collect(Collectors.toList())) - .orElse(null); + return Optional + .ofNullable(subjects) + .map( + ss -> ss + .stream() + .map(ProvisionModelSupport::mapPersonTopic) + .collect(Collectors.toList())) + .orElse(null); } private static PersonTopic mapPersonTopic(eu.dnetlib.dhp.schema.oaf.PersonTopic pt) { From fed13e083e8d9370d7d16885c17c13a8aa1a8d46 Mon Sep 17 00:00:00 2001 From: Giambattista Bloisi Date: Thu, 5 Dec 2024 15:21:32 +0100 Subject: [PATCH 47/73] Fix: do not import joda formatting --- .../java/eu/dnetlib/pace/tree/DateRange.java | 88 +++++++++---------- .../eu/dnetlib/pace/tree/JsonListMatch.java | 2 +- .../pace/comparators/ComparatorTest.java | 30 +++++-- .../dhp/bulktag/community/ResultTagger.java | 4 +- .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 3 +- .../model/ProvisionModelSupport.java | 15 ++-- 6 files changed, 83 insertions(+), 59 deletions(-) diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/DateRange.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/DateRange.java index c913109a4..6349c944b 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/DateRange.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/DateRange.java @@ -1,10 +1,5 @@ -package eu.dnetlib.pace.tree; -import com.wcohen.ss.AbstractStringDistance; -import eu.dnetlib.pace.config.Config; -import eu.dnetlib.pace.tree.support.AbstractStringComparator; -import eu.dnetlib.pace.tree.support.ComparatorClass; -import org.joda.time.DateTime; +package eu.dnetlib.pace.tree; import java.time.DateTimeException; import java.time.LocalDate; @@ -13,55 +8,60 @@ import java.time.format.DateTimeFormatter; import java.util.Locale; import java.util.Map; +import com.wcohen.ss.AbstractStringDistance; + +import eu.dnetlib.pace.config.Config; +import eu.dnetlib.pace.tree.support.AbstractStringComparator; +import eu.dnetlib.pace.tree.support.ComparatorClass; + @ComparatorClass("dateRange") public class DateRange extends AbstractStringComparator { - int YEAR_RANGE; + int YEAR_RANGE; - public DateRange(Map params) { - super(params, new com.wcohen.ss.JaroWinkler()); - YEAR_RANGE = Integer.parseInt(params.getOrDefault("year_range", "3")); - } + public DateRange(Map params) { + super(params, new com.wcohen.ss.JaroWinkler()); + YEAR_RANGE = Integer.parseInt(params.getOrDefault("year_range", "3")); + } - public DateRange(final double weight) { - super(weight, new com.wcohen.ss.JaroWinkler()); - } + public DateRange(final double weight) { + super(weight, new com.wcohen.ss.JaroWinkler()); + } - protected DateRange(final double weight, final AbstractStringDistance ssalgo) { - super(weight, ssalgo); - } + protected DateRange(final double weight, final AbstractStringDistance ssalgo) { + super(weight, ssalgo); + } - public static boolean isNumeric(String str) { - return str.matches("\\d+"); //match a number with optional '-' and decimal. - } + public static boolean isNumeric(String str) { + return str.matches("\\d+"); // match a number with optional '-' and decimal. + } - @Override - public double distance(final String a, final String b, final Config conf) { - if (a.isEmpty() || b.isEmpty()) { - return -1.0; // return -1 if a field is missing - } + @Override + public double distance(final String a, final String b, final Config conf) { + if (a.isEmpty() || b.isEmpty()) { + return -1.0; // return -1 if a field is missing + } - try { - DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd", Locale.ENGLISH); - LocalDate d1 = LocalDate.parse(a, formatter); - LocalDate d2 = LocalDate.parse(b, formatter); - Period period = Period.between(d1, d2); + try { + DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd", Locale.ENGLISH); + LocalDate d1 = LocalDate.parse(a, formatter); + LocalDate d2 = LocalDate.parse(b, formatter); + Period period = Period.between(d1, d2); - return period.getYears() <= YEAR_RANGE? 1.0 : 0.0; - } - catch (DateTimeException e) { - return -1.0; - } + return period.getYears() <= YEAR_RANGE ? 1.0 : 0.0; + } catch (DateTimeException e) { + return -1.0; + } - } + } - @Override - public double getWeight() { - return super.weight; - } + @Override + public double getWeight() { + return super.weight; + } - @Override - protected double normalize(final double d) { - return d; - } + @Override + protected double normalize(final double d) { + return d; + } } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/JsonListMatch.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/JsonListMatch.java index e95d9206e..d9558df90 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/JsonListMatch.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/JsonListMatch.java @@ -62,7 +62,7 @@ public class JsonListMatch extends AbstractListComparator { Set types = Sets.intersection(typesA, typesB); - if (types.isEmpty()) // if no common type, it is impossible to compare + if (types.isEmpty()) // if no common type, it is impossible to compare return -1; ca = ca.stream().filter(s -> types.contains(s.split("::")[0])).collect(Collectors.toSet()); diff --git a/dhp-pace-core/src/test/java/eu/dnetlib/pace/comparators/ComparatorTest.java b/dhp-pace-core/src/test/java/eu/dnetlib/pace/comparators/ComparatorTest.java index 83539de4a..0abde84bc 100644 --- a/dhp-pace-core/src/test/java/eu/dnetlib/pace/comparators/ComparatorTest.java +++ b/dhp-pace-core/src/test/java/eu/dnetlib/pace/comparators/ComparatorTest.java @@ -72,14 +72,34 @@ public class ComparatorTest extends AbstractPaceTest { CodeMatch codeMatch = new CodeMatch(params); // names have different codes - assertEquals(0.0, codeMatch.distance("physical oceanography at ctd station june 1998 ev02a", "physical oceanography at ctd station june 1998 ir02", conf)); + assertEquals( + 0.0, + codeMatch + .distance( + "physical oceanography at ctd station june 1998 ev02a", + "physical oceanography at ctd station june 1998 ir02", conf)); // names have same code - assertEquals(1.0, codeMatch.distance("physical oceanography at ctd station june 1998 ev02a", "physical oceanography at ctd station june 1998 ev02a", conf)); + assertEquals( + 1.0, + codeMatch + .distance( + "physical oceanography at ctd station june 1998 ev02a", + "physical oceanography at ctd station june 1998 ev02a", conf)); // code is not in both names - assertEquals(-1, codeMatch.distance("physical oceanography at ctd station june 1998", "physical oceanography at ctd station june 1998 ev02a", conf)); - assertEquals(1.0, codeMatch.distance("physical oceanography at ctd station june 1998", "physical oceanography at ctd station june 1998", conf)); + assertEquals( + -1, + codeMatch + .distance( + "physical oceanography at ctd station june 1998", + "physical oceanography at ctd station june 1998 ev02a", conf)); + assertEquals( + 1.0, + codeMatch + .distance( + "physical oceanography at ctd station june 1998", "physical oceanography at ctd station june 1998", + conf)); } @Test @@ -275,7 +295,7 @@ public class ComparatorTest extends AbstractPaceTest { Arrays .asList( "{\"datainfo\":{\"deletedbyinference\":false,\"inferenceprovenance\":null,\"inferred\":false,\"invisible\":false,\"provenanceaction\":{\"classid\":\"sysimport:actionset\",\"classname\":\"Harvested\",\"schemeid\":\"dnet:provenanceActions\",\"schemename\":\"dnet:provenanceActions\"},\"trust\":\"0.9\"},\"qualifier\":{\"classid\":\"grid\",\"classname\":\"GRID Identifier\",\"schemeid\":\"dnet:pid_types\",\"schemename\":\"dnet:pid_types\"},\"value\":\"grid_1\"}", - "{\"datainfo\":{\"deletedbyinference\":false,\"inferenceprovenance\":null,\"inferred\":false,\"invisible\":false,\"provenanceaction\":{\"classid\":\"sysimport:actionset\",\"classname\":\"Harvested\",\"schemeid\":\"dnet:provenanceActions\",\"schemename\":\"dnet:provenanceActions\"},\"trust\":\"0.9\"},\"qualifier\":{\"classid\":\"ror\",\"classname\":\"Research Organization Registry\",\"schemeid\":\"dnet:pid_types\",\"schemename\":\"dnet:pid_types\"},\"value\":\"ror_1\"}"), + "{\"datainfo\":{\"deletedbyinference\":false,\"inferenceprovenance\":null,\"inferred\":false,\"invisible\":false,\"provenanceaction\":{\"classid\":\"sysimport:actionset\",\"classname\":\"Harvested\",\"schemeid\":\"dnet:provenanceActions\",\"schemename\":\"dnet:provenanceActions\"},\"trust\":\"0.9\"},\"qualifier\":{\"classid\":\"ror\",\"classname\":\"Research Organization Registry\",\"schemeid\":\"dnet:pid_types\",\"schemename\":\"dnet:pid_types\"},\"value\":\"ror_1\"}"), "authors"); List b = createFieldList( Arrays diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java index 64cbd70ba..0d6c81627 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java @@ -130,7 +130,7 @@ public class ResultTagger implements Serializable { // log.info("Remove constraints for " + communityId); if (conf.getRemoveConstraintsMap().keySet().contains(communityId) && conf.getRemoveConstraintsMap().get(communityId).getCriteria() != null && - !conf.getRemoveConstraintsMap().get(communityId).getCriteria().isEmpty() && + !conf.getRemoveConstraintsMap().get(communityId).getCriteria().isEmpty() && conf .getRemoveConstraintsMap() .get(communityId) @@ -228,7 +228,7 @@ public class ResultTagger implements Serializable { .forEach(communityId -> { if (!removeCommunities.contains(communityId) && conf.getSelectionConstraintsMap().get(communityId).getCriteria() != null && - !conf.getSelectionConstraintsMap().get(communityId).getCriteria().isEmpty() && + !conf.getSelectionConstraintsMap().get(communityId).getCriteria().isEmpty() && conf .getSelectionConstraintsMap() .get(communityId) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index ea9503d17..e1710db54 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -915,7 +915,8 @@ class MappersTest { @Test void testODFRecord_guidelines4() throws IOException { - final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_guidelines4.xml"))); + final String xml = IOUtils + .toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_guidelines4.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); final Publication p = (Publication) list.get(0); diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java index 0da0f6955..2c977a390 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java @@ -5,7 +5,6 @@ import java.io.StringReader; import java.util.*; import java.util.stream.Collectors; -import eu.dnetlib.dhp.schema.solr.PersonTopic; import org.apache.commons.lang3.StringUtils; import org.dom4j.Document; import org.dom4j.DocumentException; @@ -40,6 +39,7 @@ import eu.dnetlib.dhp.schema.solr.OpenAccessColor; import eu.dnetlib.dhp.schema.solr.OpenAccessRoute; import eu.dnetlib.dhp.schema.solr.Organization; import eu.dnetlib.dhp.schema.solr.Person; +import eu.dnetlib.dhp.schema.solr.PersonTopic; import eu.dnetlib.dhp.schema.solr.Pid; import eu.dnetlib.dhp.schema.solr.Project; import eu.dnetlib.dhp.schema.solr.Result; @@ -216,11 +216,14 @@ public class ProvisionModelSupport { } private static List mapPersonTopics(List subjects) { - return Optional.ofNullable(subjects) - .map(ss -> ss.stream() - .map(ProvisionModelSupport::mapPersonTopic) - .collect(Collectors.toList())) - .orElse(null); + return Optional + .ofNullable(subjects) + .map( + ss -> ss + .stream() + .map(ProvisionModelSupport::mapPersonTopic) + .collect(Collectors.toList())) + .orElse(null); } private static PersonTopic mapPersonTopic(eu.dnetlib.dhp.schema.oaf.PersonTopic pt) { From fd1038b44d10e17146d0aac51841818460872eca Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Fri, 6 Dec 2024 09:12:06 +0100 Subject: [PATCH 48/73] removed a sneaky break that was committed by mistake. --- .../java/eu/dnetlib/dhp/collection/orcid/ORCIDExtractor.java | 1 - 1 file changed, 1 deletion(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/ORCIDExtractor.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/ORCIDExtractor.java index 11f4c55d8..8172456bb 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/ORCIDExtractor.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/ORCIDExtractor.java @@ -154,7 +154,6 @@ public class ORCIDExtractor extends Thread { extractedItem++; if (extractedItem % 100000 == 0) { log.info("Thread {}: Extracted {} items", id, extractedItem); - break; } } } From 1c144a4dcb951319ed88a7cac4825837c5385316 Mon Sep 17 00:00:00 2001 From: miconis Date: Fri, 6 Dec 2024 09:18:10 +0100 Subject: [PATCH 49/73] minor change --- .../dhp/actionmanager/raid/GenerateRAiDActionSetJob.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/raid/GenerateRAiDActionSetJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/raid/GenerateRAiDActionSetJob.java index 3b2405956..c82934cdb 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/raid/GenerateRAiDActionSetJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/raid/GenerateRAiDActionSetJob.java @@ -43,7 +43,8 @@ public class GenerateRAiDActionSetJob { private static final List RAID_COLLECTED_FROM = listKeyValues( OPENAIRE_DATASOURCE_ID, OPENAIRE_DATASOURCE_NAME); - private static final Qualifier RAID_QUALIFIER = qualifier("0049", "Research Activity Identifier", DNET_PUBLICATION_RESOURCE, DNET_PUBLICATION_RESOURCE); + private static final Qualifier RAID_QUALIFIER = qualifier( + "0049", "Research Activity Identifier", DNET_PUBLICATION_RESOURCE, DNET_PUBLICATION_RESOURCE); private static final Qualifier RAID_INFERENCE_QUALIFIER = qualifier( "raid:openaireinference", "Inferred by OpenAIRE", DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS); From dade7d5bb86d4030fc2b69f3a26940e055e216eb Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 6 Dec 2024 10:02:07 +0100 Subject: [PATCH 50/73] minor changes --- .../java/eu/dnetlib/dhp/common/Constants.java | 5 ++-- .../raid/GenerateRAiDActionSetJob.java | 27 +++++++------------ 2 files changed, 13 insertions(+), 19 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java index b00199ea5..6a4bb34d3 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java @@ -11,8 +11,9 @@ public class Constants { public static final Map coarCodeLabelMap = Maps.newHashMap(); public static final String RAID_NS_PREFIX = "raid________"; - public static final String RAID_DATASOURCE_NAME = "Research Activity Identifier Service (RAiD)"; - public static final String RAID_OPENAIRE_ID = ""; + + public static final String END_DATE = "endDate"; + public static final String START_DATE = "startDate"; public static final String ROR_NS_PREFIX = "ror_________"; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/raid/GenerateRAiDActionSetJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/raid/GenerateRAiDActionSetJob.java index c82934cdb..e67e7171f 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/raid/GenerateRAiDActionSetJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/raid/GenerateRAiDActionSetJob.java @@ -3,6 +3,7 @@ package eu.dnetlib.dhp.actionmanager.raid; import static eu.dnetlib.dhp.actionmanager.personentity.ExtractPerson.OPENAIRE_DATASOURCE_ID; import static eu.dnetlib.dhp.actionmanager.personentity.ExtractPerson.OPENAIRE_DATASOURCE_NAME; +import static eu.dnetlib.dhp.common.Constants.*; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import static eu.dnetlib.dhp.schema.common.ModelConstants.*; import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*; @@ -24,7 +25,6 @@ import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.actionmanager.raid.model.RAiDEntity; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.common.Constants; import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.schema.action.AtomicAction; import eu.dnetlib.dhp.schema.common.ModelConstants; @@ -120,8 +120,10 @@ public class GenerateRAiDActionSetJob { qualifier("main title", "main title", DNET_DATACITE_TITLE, DNET_DATACITE_TITLE), RAID_DATA_INFO))); orp.setDescription(listFields(RAID_DATA_INFO, r.getSummary())); -// orp.setAuthor(createAuthors(r.getAuthors())); - orp.setInstance(Collections.singletonList(eu.dnetlib.dhp.actionmanager.Constants.getInstance(RAID_QUALIFIER))); + + Instance instance = new Instance(); + instance.setInstancetype(RAID_QUALIFIER); + orp.setInstance(Collections.singletonList(instance)); orp .setSubject( r @@ -140,11 +142,11 @@ public class GenerateRAiDActionSetJob { Arrays .asList( structuredProperty( - r.getEndDate(), qualifier("endDate", "endDate", DNET_DATACITE_DATE, DNET_DATACITE_DATE), + r.getEndDate(), qualifier(END_DATE, END_DATE, DNET_DATACITE_DATE, DNET_DATACITE_DATE), RAID_DATA_INFO), structuredProperty( r.getStartDate(), - qualifier("startDate", "startDate", DNET_DATACITE_DATE, DNET_DATACITE_DATE), + qualifier(START_DATE, START_DATE, DNET_DATACITE_DATE, DNET_DATACITE_DATE), RAID_DATA_INFO))); orp.setLastupdatetimestamp(now.getTime()); orp.setDateofacceptance(field(r.getStartDate(), RAID_DATA_INFO)); @@ -159,11 +161,7 @@ public class GenerateRAiDActionSetJob { ModelConstants.RESULT_RESULT, PART, HAS_PART, - RAID_COLLECTED_FROM, - RAID_DATA_INFO, - now.getTime(), - null, - null); + orp); Relation rel2 = OafMapperUtils .getRelation( resultId, @@ -171,11 +169,7 @@ public class GenerateRAiDActionSetJob { ModelConstants.RESULT_RESULT, PART, IS_PART_OF, - RAID_COLLECTED_FROM, - RAID_DATA_INFO, - now.getTime(), - null, - null); + orp); res.add(new AtomicAction<>(Relation.class, rel1)); res.add(new AtomicAction<>(Relation.class, rel2)); } @@ -184,7 +178,7 @@ public class GenerateRAiDActionSetJob { } public static String calculateOpenaireId(final String raid) { - return String.format("50|%s::%s", Constants.RAID_NS_PREFIX, DHPUtils.md5(raid)); + return String.format("50|%s::%s", RAID_NS_PREFIX, DHPUtils.md5(raid)); } public static List createAuthors(final List author) { @@ -204,7 +198,6 @@ public class GenerateRAiDActionSetJob { .json(path) .as(Encoders.bean(RAiDEntity.class)) .toJavaRDD(); - } } From 8a5ba8df45d6fb1b570853307fb99f465d0667f5 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 6 Dec 2024 10:03:11 +0100 Subject: [PATCH 51/73] minor changes --- .../main/java/eu/dnetlib/dhp/actionmanager/Constants.java | 6 ------ 1 file changed, 6 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java index 394cc22a3..d7ad7fcb9 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java @@ -112,12 +112,6 @@ public class Constants { } - public static Instance getInstance(Qualifier qualifier) { - Instance instance = new Instance(); - instance.setInstancetype(qualifier); - return instance; - } - public static void removeOutputDir(SparkSession spark, String path) { HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration()); } From ee84db7a6a5ff1e271f5d6a0cfdfcc07e1a59ac9 Mon Sep 17 00:00:00 2001 From: Miriam Baglioni Date: Fri, 6 Dec 2024 12:20:13 +0100 Subject: [PATCH 52/73] [communityfromsemrelpropagation] added filtering to remove the deletedbyinference and invisible results --- .../PrepareResultCommunitySetStep1.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep1.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep1.java index 5af2bf481..764390442 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep1.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep1.java @@ -70,7 +70,7 @@ public class PrepareResultCommunitySetStep1 { private static final String RESULT_WITH_CONTEXT = "select id, collect_set(co.id) community_context \n" + " from result " + " lateral view explode (context) c as co " + - " where datainfo.deletedbyinference = false AND lower(co.id) IN %s" + + " where lower(co.id) IN %s" + " group by id"; private static final String RESULT_PATENT = "select id " + @@ -160,7 +160,8 @@ public class PrepareResultCommunitySetStep1 { Dataset relation = readPath(spark, inputRelationPath, Relation.class); relation.createOrReplaceTempView("relation"); - Dataset result = readPath(spark, inputResultPath, resultClazz); + Dataset result = readPath(spark, inputResultPath, resultClazz) + .where("datainfo.deletedbyinference != true AND datainfo.invisible != true"); result.createOrReplaceTempView("result"); final String outputResultPath = outputPath + "/" + resultType; From 666155bafaa04db7153c98d91b911d88e9cb2f01 Mon Sep 17 00:00:00 2001 From: Miriam Baglioni Date: Fri, 6 Dec 2024 12:26:41 +0100 Subject: [PATCH 53/73] [communityfromsemrelpropagation] changed resource to have deletedbyinference = false. --- .../graph/publication/part-00000.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/graph/publication/part-00000.json b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/graph/publication/part-00000.json index 7957bcfd5..ae9083c65 100644 --- a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/graph/publication/part-00000.json +++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/graph/publication/part-00000.json @@ -7,7 +7,7 @@ {"author":[{"fullname":"Turkia, Heidi","name":"Heidi","pid":[],"rank":1,"surname":"Turkia"},{"fullname":"Sirén, Heli","name":"Heli","pid":[],"rank":2,"surname":"Sirén"},{"fullname":"Penttilä, Merja","name":"Merja","pid":[],"rank":3,"surname":"Penttilä"},{"fullname":"Pitkänen, Juha Pekka","name":"Juha Pekka","pid":[],"rank":4,"surname":"Pitkänen"}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2013-02-22"},"dateofcollection":"2022-02-28T12:29:51.291Z","dateoftransformation":"2022-02-28T16:19:35.201Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"

Hydrolysates of lignocellulosic biomass, used as substrates for the sustainable production of fuels and chemicals often contain high amounts of phenolic compounds inhibiting the production microbiota. Quantification of these inhibitor compounds may help to understand possible difficulties in bioprocessing and further the development of more efficient, robust and tolerable processes. A separation method based on capillary electrophoresis with UV detection was developed for the simultaneous quantification of 10 phenolic compounds that may have inhibitor properties. Intraday relative standard deviations were less than 0.7% for migration times and between 2.6% and 6.4% for peak areas. Interday relative standard deviations were less than 3.0% for migration times and between 5.0% and 7.2% for peak areas. The method was applied to demonstrate that Saccharomyces cerevisiae was able to decrease the concentrations of vanillin, coniferyl aldehyde, syringaldehyde, acetoguaiacone and cinnamic acid during the cultivation, whereas the concentrations of phenols increased.

"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::6c232359e3b3165574cb88f0554d9264","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1016/j.chroma.2013.01.004"}],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2013-02-22"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/91f411d0-d8f8-4bf1-9072-345303cc776c"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1646505708387,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2022-01-29T08:16:51Z","harvestDate":"2022-02-28T12:29:51.291Z","identifier":"oai:cris.vtt.fi:publications/91f411d0-d8f8-4bf1-9072-345303cc776c","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/91f411d0-d8f8-4bf1-9072-345303cc776c","50|355e65625b88::6c232359e3b3165574cb88f0554d9264"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Turkia , H , Sirén , H , Penttilä , M & Pitkänen , J P 2013 , ' Capillary electrophoresis for the monitoring of phenolic compounds in bioprocesses ' , Journal of Chromatography A , vol. 1278 , pp. 175-180 . https://doi.org/10.1016/j.chroma.2013.01.004"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Bioprocess monitoring"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Capillary electrophoresis"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Phenolic compounds"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Capillary electrophoresis for the monitoring of phenolic compounds in bioprocesses"}]} {"author":[{"fullname":"Veijalainen, Jari","name":"Jari","pid":[],"rank":1,"surname":"Veijalainen"},{"fullname":"Wolski, Antoni","name":"Antoni","pid":[],"rank":2,"surname":"Wolski"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[{"dataInfo": null,"id": "dariah"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1991-01-01"},"dateofcollection":"2022-02-28T12:33:57.005Z","dateoftransformation":"2022-02-28T16:33:35.101Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::74009c567c81b4aa55c813db658734df","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1991-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0002","classname":"Book","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/bb17c77a-f574-4921-a5cb-32dc1f283fa3"]},{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1991-01-01"},"distributionlocation":"","hostedby":{"key":"10|eurocrisdris::fe4903425d9040f680d8610d9079ea14","value":"VTT Research Information System"},"instancetype":{"classid":"0002","classname":"Book","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/bb17c77a-f574-4921-a5cb-32dc1f283fa3"]}, {"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1991-01-01"},"distributionlocation":"","hostedby":{"key":"10|opendoar____::f0dd4a99fba6075a9494772b58f95280","value":"VTT Research Information System"},"instancetype":{"classid":"0002","classname":"Book","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/bb17c77a-f574-4921-a5cb-32dc1f283fa3"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1646505716994,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2020-12-21T07:05:54Z","harvestDate":"2022-02-28T12:33:57.005Z","identifier":"oai:cris.vtt.fi:publications/bb17c77a-f574-4921-a5cb-32dc1f283fa3","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["50|355e65625b88::74009c567c81b4aa55c813db658734df","oai:cris.vtt.fi:publications/bb17c77a-f574-4921-a5cb-32dc1f283fa3"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"VTT Technical Research Centre of Finland"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Veijalainen , J & Wolski , A 1991 , Prepare and commit certification for decentralized transaction management in rigorous multidatabases : Research Report No. J-1 . VTT Technical Research Centre of Finland ."}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Prepare and commit certification for decentralized transaction management in rigorous multidatabases:Research Report No. J-1"}]} {"author":[{"fullname":"Hanhijärvi, Antti","name":"Antti","pid":[],"rank":1,"surname":"Hanhijärvi"},{"fullname":"Hukka, A.","name":"A.","pid":[],"rank":2,"surname":"Hukka"},{"fullname":"Paajanen, T.","name":"T.","pid":[],"rank":3,"surname":"Paajanen"},{"fullname":"Pulkkinen, P.","name":"P.","pid":[],"rank":4,"surname":"Pulkkinen"},{"fullname":"Sundman, S.","name":"S.","pid":[],"rank":5,"surname":"Sundman"}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2003-01-01"},"dateofcollection":"2022-02-28T12:32:33.974Z","dateoftransformation":"2022-02-28T17:38:24.191Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"The paper presents experimental results of jet drying tests of birch (Betula pendula) and spruce (Picea abies) veneers at temperatures of 140°C and 190°C. Results include drying rates for 1.5 mm thick birch veneers as well as 1.5 mm and 2.6 mm thick spruce veneers of both heartwood and sapwood. Based on the test results, material parameter values for a simplified drying model are assessed. The model is based on the use of an effective diffusion coefficient and an effective surface emission coefficient, which values are calibrated to fit to the experimental results. It is observed, that separate model parameter sets are needed for the two different species but also for occurrence of heartwood or sapwood (spruce) and different thickness values of veneers."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|dedup_wf_001::08d6f2001319c86d0e69b0f83ad75df2","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1007/s00107-003-0379-4"}],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2003-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/8cc91100-904f-43c5-bb3d-1cc3e0f4a4b5"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1646505760180,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2022-01-31T21:18:51Z","harvestDate":"2022-02-28T12:32:33.974Z","identifier":"oai:cris.vtt.fi:publications/8cc91100-904f-43c5-bb3d-1cc3e0f4a4b5","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["50|355e65625b88::9cb10895b4a92b0215b85acb2c3268b9","oai:cris.vtt.fi:publications/8cc91100-904f-43c5-bb3d-1cc3e0f4a4b5"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Hanhijärvi , A , Hukka , A , Paajanen , T , Pulkkinen , P & Sundman , S 2003 , ' Experimental investigation of jet drying of birch and spruce veneers and modelling with a simplified approach ' , Holz als Roh- und Werkstoff , vol. 61 , no. 2 , pp. 83-88 . https://doi.org/10.1007/s00107-003-0379-4"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"jet drying"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"drying"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"veneers"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"birch"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"spruce"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"heartwood"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"sapwood"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Experimental investigation of jet drying of birch and spruce veneers and modelling with a simplified approach"}]} -{"author":[{"fullname":"Vainonen-Ahlgren, Elizaveta","name":"Elizaveta","pid":[],"rank":1,"surname":"Vainonen-Ahlgren"},{"fullname":"Likonen, Jari","name":"Jari","pid":[],"rank":2,"surname":"Likonen"},{"fullname":"Renvall,","pid":[],"rank":3},{"fullname":"Rohde, V.","name":"V.","pid":[],"rank":4,"surname":"Rohde"},{"fullname":"Mayer, M.","name":"M.","pid":[],"rank":5,"surname":"Mayer"}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2007-01-01"},"dateofcollection":"2022-02-28T12:32:50.667Z","dateoftransformation":"2022-02-28T17:49:49.964Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"To investigate material transport in scrape-off layer plasma and long term deposition in divertor, 13CH4 was puffed at the end of 2004 and 2005 experimental campaigns into ASDEX Upgrade from the outer mid-plane.
Ex situ analyses of the tiles were performed by secondary ion mass spectrometry.
The peaks of 13C were detected below the bottom inner strike point and at the horizontal tile at the outer lower divertor. It was detected ∼21% of the total puffed 13C amount.
The deposition rate for carbon by plasma was also calculated in long term experiment. It was obtained to be 22 × 10−3 and 8.7 × 10−3 g/s for the upper (campaign 2004) and lower (campaign 2003) divertors, respectively."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|dedup_wf_001::06e51d2bf295531b2d2e7a1b55500783","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1016/j.jnucmat.2007.01.026"}],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2007-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Patent","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/2472b21e-1fdc-4121-946e-e9c8fae6d02d"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1646505766149,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2022-02-01T02:35:05Z","harvestDate":"2022-02-28T12:32:50.667Z","identifier":"oai:cris.vtt.fi:publications/2472b21e-1fdc-4121-946e-e9c8fae6d02d","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["50|355e65625b88::a29614444f5030f11e75c6c27264d272","oai:cris.vtt.fi:publications/2472b21e-1fdc-4121-946e-e9c8fae6d02d"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Vainonen-Ahlgren , E , Likonen , J , Renvall , Rohde , V & Mayer , M 2007 , ' Migration of 13C and deposition at ASDEX Upgrade ' , Journal of Nuclear Materials , vol. 363-365 , pp. 270-275 . https://doi.org/10.1016/j.jnucmat.2007.01.026"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"ASDEX upgrade"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"divertor"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"carbon based materials"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"erosion"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"deposition"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"ITER"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"JET"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"plasma"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"fusion energy"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Migration of 13C and deposition at ASDEX Upgrade"}]} +{"author":[{"fullname":"Vainonen-Ahlgren, Elizaveta","name":"Elizaveta","pid":[],"rank":1,"surname":"Vainonen-Ahlgren"},{"fullname":"Likonen, Jari","name":"Jari","pid":[],"rank":2,"surname":"Likonen"},{"fullname":"Renvall,","pid":[],"rank":3},{"fullname":"Rohde, V.","name":"V.","pid":[],"rank":4,"surname":"Rohde"},{"fullname":"Mayer, M.","name":"M.","pid":[],"rank":5,"surname":"Mayer"}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2007-01-01"},"dateofcollection":"2022-02-28T12:32:50.667Z","dateoftransformation":"2022-02-28T17:49:49.964Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"To investigate material transport in scrape-off layer plasma and long term deposition in divertor, 13CH4 was puffed at the end of 2004 and 2005 experimental campaigns into ASDEX Upgrade from the outer mid-plane.
Ex situ analyses of the tiles were performed by secondary ion mass spectrometry.
The peaks of 13C were detected below the bottom inner strike point and at the horizontal tile at the outer lower divertor. It was detected ∼21% of the total puffed 13C amount.
The deposition rate for carbon by plasma was also calculated in long term experiment. It was obtained to be 22 × 10−3 and 8.7 × 10−3 g/s for the upper (campaign 2004) and lower (campaign 2003) divertors, respectively."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|dedup_wf_001::06e51d2bf295531b2d2e7a1b55500783","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1016/j.jnucmat.2007.01.026"}],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2007-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Patent","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/2472b21e-1fdc-4121-946e-e9c8fae6d02d"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1646505766149,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2022-02-01T02:35:05Z","harvestDate":"2022-02-28T12:32:50.667Z","identifier":"oai:cris.vtt.fi:publications/2472b21e-1fdc-4121-946e-e9c8fae6d02d","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["50|355e65625b88::a29614444f5030f11e75c6c27264d272","oai:cris.vtt.fi:publications/2472b21e-1fdc-4121-946e-e9c8fae6d02d"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Vainonen-Ahlgren , E , Likonen , J , Renvall , Rohde , V & Mayer , M 2007 , ' Migration of 13C and deposition at ASDEX Upgrade ' , Journal of Nuclear Materials , vol. 363-365 , pp. 270-275 . https://doi.org/10.1016/j.jnucmat.2007.01.026"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"ASDEX upgrade"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"divertor"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"carbon based materials"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"erosion"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"deposition"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"ITER"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"JET"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"plasma"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"fusion energy"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Migration of 13C and deposition at ASDEX Upgrade"}]} {"author":[{"fullname":"Aalto, Timo","name":"Timo","pid":[],"rank":1,"surname":"Aalto"},{"fullname":"Harjanne, Mikko","name":"Mikko","pid":[],"rank":2,"surname":"Harjanne"},{"fullname":"Kapulainen, Markku","name":"Markku","pid":[],"rank":3,"surname":"Kapulainen"}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[{"dataInfo": null,"id": "beopen"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2003-01-01"},"dateofcollection":"2022-02-28T12:32:37.581Z","dateoftransformation":"2022-02-28T19:39:18.717Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"A novel method has been developed for measuring the rotational angle of a fiber's or a waveguide's polarization axis with respect to a reference angle. The reference angle is the polarization axis of the measuring device. The method also gives the true polarization extinction ratio of the measured fiber or waveguide. The method is suitable for the characterization and rotational alignment of polarization-maintaining waveguides and fibers. In particular, the method can be used to rotationally align the fiber-waveguide interconnections during waveguide characterization. The measuring device is either a linear polarizer or a polarization splitter that is accurately rotated with respect to the device under test. According to the experiments with a polarization-maintaining fiber, the method is very easy and inexpensive to implement, and the angular accuracy can be better than 0.2 deg."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|openorgs____::64badd35233ba2cd4946368ef2f4cf57","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1117/1.1600730"}],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2003-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/8cd538fb-6484-4655-81dd-47348d358fd4"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1646505829230,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2022-01-31T21:47:37Z","harvestDate":"2022-02-28T12:32:37.581Z","identifier":"oai:cris.vtt.fi:publications/8cd538fb-6484-4655-81dd-47348d358fd4","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["50|355e65625b88::df0143af011fd82af8ac2d07b03ee8cd","oai:cris.vtt.fi:publications/8cd538fb-6484-4655-81dd-47348d358fd4"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Aalto , T , Harjanne , M & Kapulainen , M 2003 , ' Method for the rotational alignment of polarization-maintaining optical fibers and waveguides ' , Optical Engineering , vol. 42 , no. 10 , pp. 2861-2867 . https://doi.org/10.1117/1.1600730"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"optical waveguide"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"polarization-maintaining fiber"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"characterization"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"fiber-waveguide coupling"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"polarization"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"polarization axis"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"polarizer"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"polarization splitter"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Method for the rotational alignment of polarization-maintaining optical fibers and waveguides"}]} {"author":[{"fullname":"Aalto, Timo","name":"Timo","pid":[],"rank":1,"surname":"Aalto"},{"fullname":"Harjanne, Mikko","name":"Mikko","pid":[],"rank":2,"surname":"Harjanne"},{"fullname":"Kapulainen, Markku","name":"Markku","pid":[],"rank":3,"surname":"Kapulainen"}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[{"dataInfo": null,"id": "beopen"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2003-01-01"},"dateofcollection":"2022-02-28T12:32:37.581Z","dateoftransformation":"2022-02-28T19:39:18.717Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"A novel method has been developed for measuring the rotational angle of a fiber's or a waveguide's polarization axis with respect to a reference angle. The reference angle is the polarization axis of the measuring device. The method also gives the true polarization extinction ratio of the measured fiber or waveguide. The method is suitable for the characterization and rotational alignment of polarization-maintaining waveguides and fibers. In particular, the method can be used to rotationally align the fiber-waveguide interconnections during waveguide characterization. The measuring device is either a linear polarizer or a polarization splitter that is accurately rotated with respect to the device under test. According to the experiments with a polarization-maintaining fiber, the method is very easy and inexpensive to implement, and the angular accuracy can be better than 0.2 deg."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|openorgs____::64badd35233ba2cd4946368ef2f4cf57","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1117/1.1600730"}],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2003-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/8cd538fb-6484-4655-81dd-47348d358fd4"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1646505829230,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2022-01-31T21:47:37Z","harvestDate":"2022-02-28T12:32:37.581Z","identifier":"oai:cris.vtt.fi:publications/8cd538fb-6484-4655-81dd-47348d358fd4","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["50|355e65625b88::df0143af011fd82af8ac2d07b03ee8cd","oai:cris.vtt.fi:publications/8cd538fb-6484-4655-81dd-47348d358fd4"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Aalto , T , Harjanne , M & Kapulainen , M 2003 , ' Method for the rotational alignment of polarization-maintaining optical fibers and waveguides ' , Optical Engineering , vol. 42 , no. 10 , pp. 2861-2867 . https://doi.org/10.1117/1.1600730"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"optical waveguide"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"polarization-maintaining fiber"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"characterization"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"fiber-waveguide coupling"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"polarization"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"polarization axis"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"polarizer"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"polarization splitter"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Method for the rotational alignment of polarization-maintaining optical fibers and waveguides"}]} {"author":[{"fullname":"Penttilä, Raimo","name":"Raimo","pid":[],"rank":1,"surname":"Penttilä"},{"fullname":"Vanttaja, Ilkka","name":"Ilkka","pid":[],"rank":2,"surname":"Vanttaja"},{"fullname":"Haapamäki, Petteri","name":"Petteri","pid":[],"rank":3,"surname":"Haapamäki"},{"fullname":"Kujanpää, Veli","name":"Veli","pid":[],"rank":4,"surname":"Kujanpää"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|opendoar____::f0dd4a99fba6075a9494772b58f95280","value":"VTT Research Information System"}],"context": [{"dataInfo":null, "id":"dh-ch"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1994-01-01"},"dateofcollection":"2022-02-28T12:35:26.769Z","dateoftransformation":"2022-02-28T19:54:10.494Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::e7d48a470b13bda61f7ebe3513e20cb6","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1994-01-01"},"distributionlocation":"","hostedby":{"key":"10|eurocrisdris::9ae43d14471c4b33661fedda6f06b539","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/ddbd28ea-5fbf-43e1-896f-e69856870c26"]}],"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1646505838552,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2019-08-08T07:09:42Z","harvestDate":"2022-02-28T12:35:26.769Z","identifier":"oai:cris.vtt.fi:publications/ddbd28ea-5fbf-43e1-896f-e69856870c26","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/ddbd28ea-5fbf-43e1-896f-e69856870c26","50|355e65625b88::e7d48a470b13bda61f7ebe3513e20cb6"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Penttilä , R , Vanttaja , I , Haapamäki , P & Kujanpää , V 1994 , ' Liimauksen ja puristusliittämisen yhdistämisellä lisää lujuutta, jäykkyyttä ja tiiveyttä ' , Ohutlevyuutiset , no. 2 , pp. 17-19 ."}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Liimauksen ja puristusliittämisen yhdistämisellä lisää lujuutta, jäykkyyttä ja tiiveyttä"}]} \ No newline at end of file From e4b814b3f1bdb3ab52a439aff82aa56359a80cb7 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 6 Dec 2024 13:58:39 +0100 Subject: [PATCH 54/73] code formatting --- .../dhp/collection/CollectorWorker.java | 2 +- .../collection/crossref/Crossref2Oaf.scala | 13 ++-- .../crossref/CrossrefMappingTest.scala | 18 +++-- .../PrepareResultCommunitySetStep1.java | 76 ++++++++++--------- .../PrepareResultCommunitySetStep2.java | 2 +- .../ResultToCommunityJobTest.java | 70 ++++++++--------- 6 files changed, 97 insertions(+), 84 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorker.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorker.java index 4c6d0653e..67966d523 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorker.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorker.java @@ -7,7 +7,6 @@ import java.io.IOException; import java.util.Optional; import java.util.concurrent.atomic.AtomicInteger; -import eu.dnetlib.dhp.collection.plugin.zenodo.CollectZenodoDumpCollectorPlugin; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; @@ -29,6 +28,7 @@ import eu.dnetlib.dhp.collection.plugin.mongodb.MongoDbDumpCollectorPlugin; import eu.dnetlib.dhp.collection.plugin.oai.OaiCollectorPlugin; import eu.dnetlib.dhp.collection.plugin.osf.OsfPreprintsCollectorPlugin; import eu.dnetlib.dhp.collection.plugin.rest.RestCollectorPlugin; +import eu.dnetlib.dhp.collection.plugin.zenodo.CollectZenodoDumpCollectorPlugin; import eu.dnetlib.dhp.common.aggregation.AggregatorReport; import eu.dnetlib.dhp.common.collection.CollectorException; import eu.dnetlib.dhp.common.collection.HttpClientParams; diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/crossref/Crossref2Oaf.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/crossref/Crossref2Oaf.scala index c72b366a0..ea2177497 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/crossref/Crossref2Oaf.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/crossref/Crossref2Oaf.scala @@ -503,7 +503,6 @@ case object Crossref2Oaf { ) } - if (doi.startsWith("10.3410") || doi.startsWith("10.12703")) instance.setHostedby( OafMapperUtils.keyValue(OafMapperUtils.createOpenaireId(10, "openaire____::H1Connect", true), "H1Connect") @@ -556,14 +555,18 @@ case object Crossref2Oaf { result } - - def generateIdentifier(oaf: Result, doi: String): String = { val id = DHPUtils.md5(doi.toLowerCase) s"50|doiboost____|$id" } - private def generateAuthor(given: String, family: String, orcid: String, index: Int, affiliation: Option[List[mappingAffiliation]]): Author = { + private def generateAuthor( + given: String, + family: String, + orcid: String, + index: Int, + affiliation: Option[List[mappingAffiliation]] + ): Author = { val a = new Author a.setName(given) a.setSurname(family) @@ -700,7 +703,6 @@ case object Crossref2Oaf { if (objectType == null) return resultList - // If the item has a relations is-review-of, then we force it to a peer-review val is_review = json \ "relation" \ "is-review-of" \ "id" var force_to_review = false @@ -713,7 +715,6 @@ case object Crossref2Oaf { if (typology == null) return List() - val result = generateItemFromType(typology._2) if (result == null) return List() diff --git a/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/collection/crossref/CrossrefMappingTest.scala b/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/collection/crossref/CrossrefMappingTest.scala index 12ca14ba1..ebe247d8a 100644 --- a/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/collection/crossref/CrossrefMappingTest.scala +++ b/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/collection/crossref/CrossrefMappingTest.scala @@ -28,17 +28,21 @@ class CrossrefMappingTest extends AbstractVocabularyTest { val input = IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/collection/crossref/issn_pub.json"), "utf-8") - Crossref2Oaf.convert(input, vocabularies, TransformationType.All).foreach(record => { - Assertions.assertNotNull(record) - }) + Crossref2Oaf + .convert(input, vocabularies, TransformationType.All) + .foreach(record => { + Assertions.assertNotNull(record) + }) } - @Test def mappingAffiliation(): Unit = { val input = - IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/collection/crossref/affiliationTest.json"), "utf-8") + IOUtils.toString( + getClass.getResourceAsStream("/eu/dnetlib/dhp/collection/crossref/affiliationTest.json"), + "utf-8" + ) val data = Crossref2Oaf.convert(input, vocabularies, TransformationType.OnlyResult) data.foreach(record => { Assertions.assertNotNull(record) @@ -46,10 +50,10 @@ class CrossrefMappingTest extends AbstractVocabularyTest { val publication = record.asInstanceOf[Publication] publication.getAuthor.asScala.foreach(author => { Assertions.assertNotNull(author.getRawAffiliationString) - Assertions.assertTrue(author.getRawAffiliationString.size()>0) + Assertions.assertTrue(author.getRawAffiliationString.size() > 0) - }) }) + }) println(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(data.head)) } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep1.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep1.java index 764390442..ecb7cc827 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep1.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep1.java @@ -1,16 +1,16 @@ package eu.dnetlib.dhp.resulttocommunityfromsemrel; +import static java.lang.String.join; + import static eu.dnetlib.dhp.PropagationConstant.*; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; -import static java.lang.String.join; import java.io.IOException; import java.util.Arrays; import java.util.Collections; import java.util.List; -import eu.dnetlib.dhp.schema.common.ModelConstants; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.sql.*; @@ -22,6 +22,7 @@ import com.google.gson.Gson; import eu.dnetlib.dhp.api.Utils; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.resulttocommunityfromorganization.ResultCommunityList; +import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.utils.ISLookupClientFactory; @@ -37,8 +38,7 @@ public class PrepareResultCommunitySetStep1 { * relation */ // TODO - private static final String RESULT_CONTEXT_QUERY_TEMPLATE = - "select target resultId, community_context " + private static final String RESULT_CONTEXT_QUERY_TEMPLATE = "select target resultId, community_context " + "from (select id, collect_set(co.id) community_context " + " from result " + " lateral view explode (context) c as co " @@ -60,26 +60,26 @@ public class PrepareResultCommunitySetStep1 { + "where length(co) > 0 " + "group by resultId"; - private static final String RESULT_CONTEXT_QUERY_TEMPLATE_IS_RELATED_TO = - "select target as resultId, community_context " + - "from resultWithContext rwc " + - "join relatedToRelations r " + - "join patents p " + - "on rwc.id = r.source and r.target = p.id"; + private static final String RESULT_CONTEXT_QUERY_TEMPLATE_IS_RELATED_TO = "select target as resultId, community_context " + + + "from resultWithContext rwc " + + "join relatedToRelations r " + + "join patents p " + + "on rwc.id = r.source and r.target = p.id"; private static final String RESULT_WITH_CONTEXT = "select id, collect_set(co.id) community_context \n" + - " from result " + - " lateral view explode (context) c as co " + - " where lower(co.id) IN %s" + - " group by id"; + " from result " + + " lateral view explode (context) c as co " + + " where lower(co.id) IN %s" + + " group by id"; private static final String RESULT_PATENT = "select id " + - " from result " + - " where array_contains(instance.instancetype.classname, 'Patent')"; + " from result " + + " where array_contains(instance.instancetype.classname, 'Patent')"; private static final String IS_RELATED_TO_RELATIONS = "select source, target " + - " from relation " + - " where lower(relClass) = 'isrelatedto' and datainfo.deletedbyinference = false"; + " from relation " + + " where lower(relClass) = 'isrelatedto' and datainfo.deletedbyinference = false"; public static void main(String[] args) throws Exception { String jsonConfiguration = IOUtils @@ -107,17 +107,25 @@ public class PrepareResultCommunitySetStep1 { SparkConf conf = new SparkConf(); conf.set("hive.metastore.uris", parser.get("hive_metastore_uris")); - final String allowedsemrel ="(" + join(",", - Arrays.asList(parser.get("allowedsemrels").split(";")).stream().map(value -> "'" + value.toLowerCase() + "'") - .toArray(String[]::new)) + ")"; + final String allowedsemrel = "(" + join( + ",", + Arrays + .asList(parser.get("allowedsemrels").split(";")) + .stream() + .map(value -> "'" + value.toLowerCase() + "'") + .toArray(String[]::new)) + + ")"; log.info("allowedSemRel: {}", allowedsemrel); final String baseURL = parser.get("baseURL"); log.info("baseURL: {}", baseURL); - final String communityIdList = "(" + join(",", getCommunityList(baseURL).stream() + final String communityIdList = "(" + join( + ",", getCommunityList(baseURL) + .stream() .map(value -> "'" + value.toLowerCase() + "'") - .toArray(String[]::new)) + ")"; + .toArray(String[]::new)) + + ")"; final String resultType = resultClassName.substring(resultClassName.lastIndexOf(".") + 1).toLowerCase(); log.info("resultType: {}", resultType); @@ -161,18 +169,17 @@ public class PrepareResultCommunitySetStep1 { relation.createOrReplaceTempView("relation"); Dataset result = readPath(spark, inputResultPath, resultClazz) - .where("datainfo.deletedbyinference != true AND datainfo.invisible != true"); + .where("datainfo.deletedbyinference != true AND datainfo.invisible != true"); result.createOrReplaceTempView("result"); final String outputResultPath = outputPath + "/" + resultType; log.info("writing output results to: {}", outputResultPath); - String resultContextQuery = String .format( RESULT_CONTEXT_QUERY_TEMPLATE, - "AND lower(co.id) IN " + communityIdList, - "AND lower(relClass) IN " + allowedsemrel); + "AND lower(co.id) IN " + communityIdList, + "AND lower(relClass) IN " + allowedsemrel); Dataset result_context = spark.sql(resultContextQuery); Dataset rwc = spark.sql(String.format(RESULT_WITH_CONTEXT, communityIdList)); @@ -183,18 +190,17 @@ public class PrepareResultCommunitySetStep1 { patents.createOrReplaceTempView("patents"); relatedToRelations.createOrReplaceTempView("relatedTorelations"); - - result_context = result_context.unionAll( spark.sql(RESULT_CONTEXT_QUERY_TEMPLATE_IS_RELATED_TO)); + result_context = result_context.unionAll(spark.sql(RESULT_CONTEXT_QUERY_TEMPLATE_IS_RELATED_TO)); result_context.createOrReplaceTempView("result_context"); spark - .sql(RESULT_COMMUNITY_LIST_QUERY) - .as(Encoders.bean(ResultCommunityList.class)) - .write() - .option("compression", "gzip") - .mode(SaveMode.Append) - .json(outputResultPath); + .sql(RESULT_COMMUNITY_LIST_QUERY) + .as(Encoders.bean(ResultCommunityList.class)) + .write() + .option("compression", "gzip") + .mode(SaveMode.Append) + .json(outputResultPath); } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep2.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep2.java index 9bebc36e5..9801b1bf6 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep2.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep2.java @@ -77,7 +77,7 @@ public class PrepareResultCommunitySetStep2 { if (b == null) { return a; } - Set community_set = new HashSet<>(a.getCommunityList()); + Set community_set = new HashSet<>(a.getCommunityList()); community_set.addAll(b.getCommunityList()); a.setCommunityList(new ArrayList<>(community_set)); return a; diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/ResultToCommunityJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/ResultToCommunityJobTest.java index c1fcff4d9..2b52c91de 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/ResultToCommunityJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/ResultToCommunityJobTest.java @@ -10,7 +10,6 @@ import java.util.ArrayList; import java.util.List; import java.util.stream.Collectors; -import eu.dnetlib.dhp.resulttocommunityfromorganization.ResultCommunityList; import org.apache.commons.io.FileUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; @@ -27,6 +26,7 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.resulttocommunityfromorganization.ResultCommunityList; import eu.dnetlib.dhp.schema.oaf.Dataset; import scala.collection.Seq; @@ -279,53 +279,55 @@ public class ResultToCommunityJobTest { @Test public void prepareStep1Test() throws Exception { /* - - - final String allowedsemrel = join(",", Arrays.stream(parser.get("allowedsemrels").split(";")) - .map(value -> "'" + value.toLowerCase() + "'") - .toArray(String[]::new)); - - log.info("allowedSemRel: {}", new Gson().toJson(allowedsemrel)); - - final String baseURL = parser.get("baseURL"); - log.info("baseURL: {}", baseURL); + * final String allowedsemrel = join(",", Arrays.stream(parser.get("allowedsemrels").split(";")) .map(value -> + * "'" + value.toLowerCase() + "'") .toArray(String[]::new)); log.info("allowedSemRel: {}", new + * Gson().toJson(allowedsemrel)); final String baseURL = parser.get("baseURL"); log.info("baseURL: {}", + * baseURL); */ PrepareResultCommunitySetStep1 - .main( - new String[] { - "-isSparkSessionManaged", Boolean.FALSE.toString(), - "-sourcePath", getClass() - .getResource("/eu/dnetlib/dhp/resulttocommunityfromsemrel/graph") - .getPath(), - "-hive_metastore_uris", "", - "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication", - "-outputPath", workingDir.toString() + "/preparedInfo", - "-allowedsemrels","issupplementto;issupplementedby", - "-baseURL","https://dev-openaire.d4science.org/openaire/community/" - }); + .main( + new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-sourcePath", getClass() + .getResource("/eu/dnetlib/dhp/resulttocommunityfromsemrel/graph") + .getPath(), + "-hive_metastore_uris", "", + "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication", + "-outputPath", workingDir.toString() + "/preparedInfo", + "-allowedsemrels", "issupplementto;issupplementedby", + "-baseURL", "https://dev-openaire.d4science.org/openaire/community/" + }); - - org.apache.spark.sql.Dataset resultCommunityList = spark.read().schema(Encoders.bean(ResultCommunityList.class).schema()) - .json(workingDir.toString() + "/preparedInfo/publication") - .as(Encoders.bean(ResultCommunityList.class)); + org.apache.spark.sql.Dataset resultCommunityList = spark + .read() + .schema(Encoders.bean(ResultCommunityList.class).schema()) + .json(workingDir.toString() + "/preparedInfo/publication") + .as(Encoders.bean(ResultCommunityList.class)); Assertions.assertEquals(2, resultCommunityList.count()); - Assertions.assertEquals(1,resultCommunityList.filter("resultId = '50|dedup_wf_001::06e51d2bf295531b2d2e7a1b55500783'").count()); - Assertions.assertEquals(1,resultCommunityList.filter("resultId = '50|pending_org_::82f63b2d21ae88596b9d8991780e9888'").count()); + Assertions + .assertEquals( + 1, + resultCommunityList.filter("resultId = '50|dedup_wf_001::06e51d2bf295531b2d2e7a1b55500783'").count()); + Assertions + .assertEquals( + 1, + resultCommunityList.filter("resultId = '50|pending_org_::82f63b2d21ae88596b9d8991780e9888'").count()); ArrayList communities = resultCommunityList - .filter("resultId = '50|dedup_wf_001::06e51d2bf295531b2d2e7a1b55500783'") - .first().getCommunityList(); + .filter("resultId = '50|dedup_wf_001::06e51d2bf295531b2d2e7a1b55500783'") + .first() + .getCommunityList(); Assertions.assertEquals(2, communities.size()); Assertions.assertTrue(communities.stream().anyMatch(cid -> "beopen".equals(cid))); Assertions.assertTrue(communities.stream().anyMatch(cid -> "dh-ch".equals(cid))); communities = resultCommunityList - .filter("resultId = '50|pending_org_::82f63b2d21ae88596b9d8991780e9888'") - .first().getCommunityList(); + .filter("resultId = '50|pending_org_::82f63b2d21ae88596b9d8991780e9888'") + .first() + .getCommunityList(); Assertions.assertEquals(1, communities.size()); Assertions.assertEquals("dh-ch", communities.get(0)); } - } From 0d050061146fc8fb7dfb1f619601719b3892da82 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Fri, 6 Dec 2024 14:23:47 +0100 Subject: [PATCH 55/73] code formatted --- .../java/eu/dnetlib/dhp/collection/orcid/ORCIDExtractor.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/ORCIDExtractor.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/ORCIDExtractor.java index 8172456bb..1adad104e 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/ORCIDExtractor.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/ORCIDExtractor.java @@ -160,7 +160,7 @@ public class ORCIDExtractor extends Thread { } } finally { for (SequenceFile.Writer k : fileMap.values()) { - log.info("Thread {}: Completed processed {} items", id, extractedItem); + log.info("Thread {}: Completed processed {} items", id, extractedItem); k.hflush(); k.close(); } From 101d9e830dc914bac5969317e78a12a3d5957045 Mon Sep 17 00:00:00 2001 From: Giambattista Bloisi Date: Wed, 11 Dec 2024 15:59:13 +0100 Subject: [PATCH 56/73] JsonListMatch do not lower the extracted strings Fix test configurations and assertions --- .../eu/dnetlib/pace/tree/JsonListMatch.java | 2 +- .../dhp/oa/dedup/DecisionTreeTest.java | 8 +++--- .../dnetlib/dhp/oa/dedup/SparkDedupTest.java | 27 ++++++++++--------- .../dhp/oa/dedup/SparkOpenorgsDedupTest.java | 4 +-- .../oa/dedup/SparkPublicationRootsTest.java | 2 +- .../oa/dedup/SparkPublicationRootsTest2.java | 9 ++++--- .../dnetlib/dhp/oa/dedup/SparkStatsTest.java | 2 +- .../dhp/oa/dedup/jpath/JsonPathTest.java | 6 ++--- .../dnetlib/dhp/dedup/conf/org.curr.conf.json | 24 +++++++---------- 9 files changed, 39 insertions(+), 45 deletions(-) diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/JsonListMatch.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/JsonListMatch.java index d9558df90..2f845b067 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/JsonListMatch.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/JsonListMatch.java @@ -86,7 +86,7 @@ public class JsonListMatch extends AbstractListComparator { // for each path in the param list for (String key : params.keySet().stream().filter(k -> k.contains("jpath")).collect(Collectors.toList())) { String path = params.get(key); - String value = MapDocumentUtil.getJPathString(path, documentContext).toLowerCase(); + String value = MapDocumentUtil.getJPathString(path, documentContext); if (value == null || value.isEmpty()) value = ""; st.append(value); diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/DecisionTreeTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/DecisionTreeTest.java index 6acc65e05..5094317cb 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/DecisionTreeTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/DecisionTreeTest.java @@ -21,17 +21,15 @@ class DecisionTreeTest { void testJPath() throws IOException { DedupConfig conf = DedupConfig - .load(IOUtils.toString(getClass().getResourceAsStream("dedup_conf_organization.json"))); + .load(IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/dedup/jpath/dedup_conf_organization.json"))); - final String org = IOUtils.toString(getClass().getResourceAsStream("organization.json")); + final String org = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/dedup/jpath/organization.json")); Row row = SparkModel.apply(conf).rowFromJson(org); System.out.println("row = " + row); Assertions.assertNotNull(row); Assertions.assertTrue(StringUtils.isNotBlank(row.getAs("identifier"))); - - System.out.println("row = " + row.getAs("countrytitle")); } @Test @@ -44,7 +42,7 @@ class DecisionTreeTest { .getResourceAsStream( "/eu/dnetlib/dhp/dedup/conf/org.curr.conf.json"))); - final String org = IOUtils.toString(getClass().getResourceAsStream("organization_example1.json")); + final String org = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/dedup/jpath/organization_example1.json")); Row row = SparkModel.apply(conf).rowFromJson(org); // to check that the same parsing returns the same row diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java index 3bfd861f8..d2d5af501 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java @@ -190,7 +190,7 @@ public class SparkDedupTest implements Serializable { System.out.println("orp_simrel = " + orp_simrel); if (CHECK_CARDINALITIES) { - assertEquals(742, orgs_simrel); + assertEquals(720, orgs_simrel); assertEquals(566, pubs_simrel); assertEquals(113, sw_simrel); assertEquals(148, ds_simrel); @@ -251,7 +251,7 @@ public class SparkDedupTest implements Serializable { // entities simrels supposed to be equal to the number of previous step (no rels in whitelist) if (CHECK_CARDINALITIES) { - assertEquals(742, orgs_simrel); + assertEquals(720, orgs_simrel); assertEquals(566, pubs_simrel); assertEquals(148, ds_simrel); assertEquals(280, orp_simrel); @@ -440,25 +440,26 @@ public class SparkDedupTest implements Serializable { .count(); final List merges = pubs - .filter("source == '50|arXiv_dedup_::c93aeb433eb90ed7a86e29be00791b7c'") + .filter("source == '50|doi_dedup___::d5021b53204e4fdeab6ff5d5bc468032'")// and relClass = '"+ModelConstants.MERGES+"'") .collectAsList(); - assertEquals(1, merges.size()); + assertEquals(4, merges.size()); Set dups = Sets .newHashSet( "50|doi_________::3b1d0d8e8f930826665df9d6b82fbb73", "50|doi_________::d5021b53204e4fdeab6ff5d5bc468032", - "50|arXiv_______::c93aeb433eb90ed7a86e29be00791b7c"); + "50|arXiv_______::c93aeb433eb90ed7a86e29be00791b7c", + "50|arXiv_dedup_::c93aeb433eb90ed7a86e29be00791b7c"); merges.forEach(r -> { assertEquals(ModelConstants.RESULT_RESULT, r.getRelType()); assertEquals(ModelConstants.DEDUP, r.getSubRelType()); - assertEquals(ModelConstants.IS_MERGED_IN, r.getRelClass()); + assertEquals(ModelConstants.MERGES, r.getRelClass()); assertTrue(dups.contains(r.getTarget())); }); final List mergedIn = pubs - .filter("target == '50|arXiv_dedup_::c93aeb433eb90ed7a86e29be00791b7c'") + .filter("target == '50|doi_dedup___::d5021b53204e4fdeab6ff5d5bc468032'") .collectAsList(); - assertEquals(3, mergedIn.size()); + assertEquals(4, mergedIn.size()); mergedIn.forEach(r -> { assertEquals(ModelConstants.RESULT_RESULT, r.getRelType()); assertEquals(ModelConstants.DEDUP, r.getSubRelType()); @@ -473,8 +474,8 @@ public class SparkDedupTest implements Serializable { System.out.println("orp_mergerel = " + orp_mergerel); if (CHECK_CARDINALITIES) { - assertEquals(1268, orgs_mergerel); - assertEquals(1156, pubs.count()); + assertEquals(1280, orgs_mergerel); + assertEquals(1158, pubs.count()); assertEquals(292, sw_mergerel); assertEquals(476, ds_mergerel); assertEquals(742, orp_mergerel); @@ -561,7 +562,7 @@ public class SparkDedupTest implements Serializable { System.out.println("orp_mergerel = " + orp_mergerel); if (CHECK_CARDINALITIES) { - assertEquals(1278, orgs_mergerel); + assertEquals(1280, orgs_mergerel); assertEquals(1156, pubs.count()); assertEquals(292, sw_mergerel); assertEquals(476, ds_mergerel); @@ -618,7 +619,7 @@ public class SparkDedupTest implements Serializable { System.out.println("orp_deduprecord = " + orp_deduprecord); if (CHECK_CARDINALITIES) { - assertEquals(78, orgs_deduprecord); + assertEquals(87, orgs_deduprecord); assertEquals(96, pubs.count()); assertEquals(47, sw_deduprecord); assertEquals(97, ds_deduprecord); @@ -761,7 +762,7 @@ public class SparkDedupTest implements Serializable { if (CHECK_CARDINALITIES) { assertEquals(930, publications); - assertEquals(831, organizations); + assertEquals(840, organizations); assertEquals(100, projects); assertEquals(100, datasource); assertEquals(196, softwares); diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsDedupTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsDedupTest.java index b2b5d824b..6b3ef8a20 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsDedupTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsDedupTest.java @@ -146,7 +146,7 @@ public class SparkOpenorgsDedupTest implements Serializable { .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "organization")) .count(); - assertEquals(92, orgs_simrel); + assertEquals(91, orgs_simrel); } @Test @@ -175,7 +175,7 @@ public class SparkOpenorgsDedupTest implements Serializable { .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "organization")) .count(); - assertEquals(128, orgs_simrel); + assertEquals(127, orgs_simrel); } @Test diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkPublicationRootsTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkPublicationRootsTest.java index 9d73475be..2efa26c02 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkPublicationRootsTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkPublicationRootsTest.java @@ -324,7 +324,7 @@ public class SparkPublicationRootsTest implements Serializable { private void verifyRoot_case_3(Dataset roots, Dataset pubs) { Publication root = roots - .filter("id = '50|dedup_wf_001::31ca734cc22181b704c4aa8fd050062a'") + .filter("id = '50|dedup_wf_002::7143f4ff5708f3657db0b7e68ea74d55'") .first(); assertNotNull(root); diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkPublicationRootsTest2.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkPublicationRootsTest2.java index 9afe1e34b..1e73801ff 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkPublicationRootsTest2.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkPublicationRootsTest2.java @@ -143,7 +143,9 @@ public class SparkPublicationRootsTest2 implements Serializable { "--graphBasePath", graphInputPath, "--actionSetId", testActionSetId, "--isLookUpUrl", "lookupurl", - "--workingPath", workingPath + "--workingPath", workingPath, + "--hiveMetastoreUris", "", + }), spark) .run(isLookUpService); @@ -153,7 +155,7 @@ public class SparkPublicationRootsTest2 implements Serializable { .as(Encoders.bean(Relation.class)); assertEquals( - 3, merges + 4, merges .filter("relclass == 'isMergedIn'") .map((MapFunction) Relation::getTarget, Encoders.STRING()) .distinct() @@ -178,7 +180,7 @@ public class SparkPublicationRootsTest2 implements Serializable { .textFile(workingPath + "/" + testActionSetId + "/publication_deduprecord") .map(asEntity(Publication.class), Encoders.bean(Publication.class)); - assertEquals(3, roots.count()); + assertEquals(4, roots.count()); final Dataset pubs = spark .read() @@ -195,7 +197,6 @@ public class SparkPublicationRootsTest2 implements Serializable { .collectAsList() .get(0); - assertEquals(crossref_duplicate.getDateofacceptance().getValue(), root.getDateofacceptance().getValue()); assertEquals(crossref_duplicate.getJournal().getName(), root.getJournal().getName()); assertEquals(crossref_duplicate.getJournal().getIssnPrinted(), root.getJournal().getIssnPrinted()); assertEquals(crossref_duplicate.getPublisher().getValue(), root.getPublisher().getValue()); diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkStatsTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkStatsTest.java index 19f2c8102..d21a0dcf4 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkStatsTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkStatsTest.java @@ -168,7 +168,7 @@ public class SparkStatsTest implements Serializable { .load(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_blockstats") .count(); - assertEquals(414, orgs_blocks); + assertEquals(406, orgs_blocks); assertEquals(221, pubs_blocks); assertEquals(134, sw_blocks); assertEquals(196, ds_blocks); diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/jpath/JsonPathTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/jpath/JsonPathTest.java index 18c9ce18d..0923f67fa 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/jpath/JsonPathTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/jpath/JsonPathTest.java @@ -19,17 +19,15 @@ class JsonPathTest { void testJPath() throws IOException { DedupConfig conf = DedupConfig - .load(IOUtils.toString(getClass().getResourceAsStream("dedup_conf_organization.json"))); + .load(IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/dedup/jpath/dedup_conf_organization.json"))); - final String org = IOUtils.toString(getClass().getResourceAsStream("organization.json")); + final String org = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/dedup/jpath/organization.json")); Row row = SparkModel.apply(conf).rowFromJson(org); System.out.println("row = " + row); Assertions.assertNotNull(row); Assertions.assertTrue(StringUtils.isNotBlank(row.getAs("identifier"))); - - System.out.println("row = " + row.getAs("countrytitle")); } @Test diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/org.curr.conf.json b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/org.curr.conf.json index f00f6198e..388f0b6ff 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/org.curr.conf.json +++ b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/org.curr.conf.json @@ -24,22 +24,19 @@ "start": { "fields": [ { - "field": "gridid", - "comparator": "exactMatch", + "field": "pid", + "comparator": "jsonListMatch", "weight": 1, "countIfUndefined": "false", - "params": {} - }, - { - "field": "rorid", - "comparator": "exactMatch", - "weight": 1, - "countIfUndefined": "false", - "params": {} + "params": { + "jpath_classid": "$.qualifier.classid", + "jpath_value": "$.value", + "mode": "type" + } } ], "threshold": 1, - "aggregation": "OR", + "aggregation": "MAX", "positive": "MATCH", "negative": "NO_MATCH", "undefined": "necessaryConditions", @@ -149,11 +146,10 @@ "model" : [ { "name" : "country", "type" : "String", "path" : "$.country.classid", "infer" : "country", "inferenceFrom" : "$.legalname.value"}, { "name" : "legalshortname", "type" : "String", "path" : "$.legalshortname.value", "infer" : "city_keyword"}, - { "name" : "original_legalname", "type" : "String", "path" : "$.legalname.value" }, + { "name" : "original_legalname", "type" : "String", "path" : "$.legalname.value", "clean": "title"}, { "name" : "legalname", "type" : "String", "path" : "$.legalname.value", "infer" : "city_keyword"}, { "name" : "websiteurl", "type" : "URL", "path" : "$.websiteurl.value" }, - { "name" : "gridid", "type" : "String", "path" : "$.pid[?(@.qualifier.classid =='grid')].value"}, - { "name" : "rorid", "type" : "String", "path" : "$.pid[?(@.qualifier.classid =='ROR')].value"}, + { "name": "pid", "type": "JSON", "path": "$.pid[*]", "overrideMatch": "true"}, { "name" : "originalId", "type" : "String", "path" : "$.id" } ], "blacklists" : {}, From 1b1fb9f1c25a66ea838182f0063a7c1d03a1f6db Mon Sep 17 00:00:00 2001 From: Miriam Baglioni Date: Wed, 11 Dec 2024 16:38:02 +0100 Subject: [PATCH 57/73] [research_fi] added plugin name to collectorplugins --- .../java/eu/dnetlib/dhp/collection/plugin/CollectorPlugin.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/CollectorPlugin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/CollectorPlugin.java index 93e65b6a7..ef0ff00c4 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/CollectorPlugin.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/CollectorPlugin.java @@ -11,7 +11,7 @@ public interface CollectorPlugin { enum NAME { - oai, other, rest_json2xml, file, fileGzip, baseDump, gtr2Publications, osfPreprints, zenodoDump; + oai, other, rest_json2xml, file, fileGzip, baseDump, gtr2Publications, osfPreprints, zenodoDump, research_fi; public enum OTHER_NAME { mdstore_mongodb_dump, mdstore_mongodb From b039952d97249b8abe04c0394cf48f83c249bd02 Mon Sep 17 00:00:00 2001 From: "sandro.labruzzo" Date: Fri, 13 Dec 2024 10:43:27 +0100 Subject: [PATCH 58/73] bug fixed on zenodo plugin --- .../CollectZenodoDumpCollectorPlugin.java | 36 +++++++++++++------ .../zenodo/ZenodoPluginCollectionTest.java | 10 ++---- 2 files changed, 29 insertions(+), 17 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/zenodo/CollectZenodoDumpCollectorPlugin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/zenodo/CollectZenodoDumpCollectorPlugin.java index 3ea29a9b0..73e6aca3e 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/zenodo/CollectZenodoDumpCollectorPlugin.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/zenodo/CollectZenodoDumpCollectorPlugin.java @@ -51,6 +51,9 @@ public class CollectZenodoDumpCollectorPlugin implements CollectorPlugin { log.info("Response code is {}", responseCode); if (responseCode >= 200 && responseCode < 400) { IOUtils.copy(response.getEntity().getContent(), fsDataOutputStream); + fsDataOutputStream.flush(); + fsDataOutputStream.hflush(); + fsDataOutputStream.close(); } } catch (Throwable eu) { throw new RuntimeException(eu); @@ -60,16 +63,30 @@ public class CollectZenodoDumpCollectorPlugin implements CollectorPlugin { } } + public FileSystem initializeFileSystem(final String hdfsURI) { + try { + return FileSystem.get(getHadoopConfiguration(hdfsURI)); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + @Override public Stream collect(ApiDescriptor api, AggregatorReport report) throws CollectorException { - try { - final String zenodoURL = api.getBaseUrl(); - final String hdfsURI = api.getParams().get("hdfsURI"); - final FileSystem fileSystem = FileSystem.get(getHadoopConfiguration(hdfsURI)); - downloadItem("zenodoDump.tar.gz", zenodoURL, "/tmp", fileSystem); - CompressionCodecFactory factory = new CompressionCodecFactory(fileSystem.getConf()); - Path sourcePath = new Path("/tmp/zenodoDump.tar.gz"); + final String zenodoURL = api.getBaseUrl(); + final String hdfsURI = api.getParams().get("hdfsURI"); + final FileSystem fileSystem = initializeFileSystem(hdfsURI); + return doStream(fileSystem, zenodoURL, "/tmp"); + } + + + public Stream doStream(FileSystem fileSystem, String zenodoURL, String basePath) throws CollectorException { + try { + + downloadItem("zenodoDump.tar.gz", zenodoURL, basePath, fileSystem); + CompressionCodecFactory factory = new CompressionCodecFactory(fileSystem.getConf()); + Path sourcePath = new Path(basePath+"/zenodoDump.tar.gz"); CompressionCodec codec = factory.getCodec(sourcePath); InputStream gzipInputStream = null; try { @@ -78,15 +95,14 @@ public class CollectZenodoDumpCollectorPlugin implements CollectorPlugin { } catch (IOException e) { throw new CollectorException(e); - } finally { - log.info("Closing gzip stream"); - org.apache.hadoop.io.IOUtils.closeStream(gzipInputStream); } } catch (Exception e) { throw new CollectorException(e); } } + + private Stream iterateTar(InputStream gzipInputStream) throws Exception { Iterable iterable = () -> new ZenodoTarIterator(gzipInputStream); diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/zenodo/ZenodoPluginCollectionTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/zenodo/ZenodoPluginCollectionTest.java index 9b5cf1850..e0112914b 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/zenodo/ZenodoPluginCollectionTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/zenodo/ZenodoPluginCollectionTest.java @@ -1,20 +1,15 @@ package eu.dnetlib.dhp.collection.plugin.zenodo; -import static org.junit.jupiter.api.Assertions.assertNotNull; - import java.util.zip.GZIPInputStream; - import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; -import com.fasterxml.jackson.databind.ObjectMapper; - -import eu.dnetlib.dhp.collection.ApiDescriptor; -import eu.dnetlib.dhp.common.collection.CollectorException; public class ZenodoPluginCollectionTest { + + @Test public void testZenodoIterator() throws Exception { @@ -32,4 +27,5 @@ public class ZenodoPluginCollectionTest { } } + } From dccbcfd36c1d4a14cd473775f4c56cf7f649db8a Mon Sep 17 00:00:00 2001 From: "sandro.labruzzo" Date: Fri, 13 Dec 2024 11:48:32 +0100 Subject: [PATCH 59/73] code formatted --- .../eu/dnetlib/dhp/collection/orcid/ORCIDExtractor.java | 2 +- .../plugin/zenodo/CollectZenodoDumpCollectorPlugin.java | 7 ++----- .../plugin/zenodo/ZenodoPluginCollectionTest.java | 5 +---- 3 files changed, 4 insertions(+), 10 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/ORCIDExtractor.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/ORCIDExtractor.java index 1adad104e..8172456bb 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/ORCIDExtractor.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/ORCIDExtractor.java @@ -160,7 +160,7 @@ public class ORCIDExtractor extends Thread { } } finally { for (SequenceFile.Writer k : fileMap.values()) { - log.info("Thread {}: Completed processed {} items", id, extractedItem); + log.info("Thread {}: Completed processed {} items", id, extractedItem); k.hflush(); k.close(); } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/zenodo/CollectZenodoDumpCollectorPlugin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/zenodo/CollectZenodoDumpCollectorPlugin.java index 73e6aca3e..9513cab51 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/zenodo/CollectZenodoDumpCollectorPlugin.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/zenodo/CollectZenodoDumpCollectorPlugin.java @@ -80,13 +80,12 @@ public class CollectZenodoDumpCollectorPlugin implements CollectorPlugin { return doStream(fileSystem, zenodoURL, "/tmp"); } - - public Stream doStream(FileSystem fileSystem, String zenodoURL, String basePath) throws CollectorException { + public Stream doStream(FileSystem fileSystem, String zenodoURL, String basePath) throws CollectorException { try { downloadItem("zenodoDump.tar.gz", zenodoURL, basePath, fileSystem); CompressionCodecFactory factory = new CompressionCodecFactory(fileSystem.getConf()); - Path sourcePath = new Path(basePath+"/zenodoDump.tar.gz"); + Path sourcePath = new Path(basePath + "/zenodoDump.tar.gz"); CompressionCodec codec = factory.getCodec(sourcePath); InputStream gzipInputStream = null; try { @@ -101,8 +100,6 @@ public class CollectZenodoDumpCollectorPlugin implements CollectorPlugin { } } - - private Stream iterateTar(InputStream gzipInputStream) throws Exception { Iterable iterable = () -> new ZenodoTarIterator(gzipInputStream); diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/zenodo/ZenodoPluginCollectionTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/zenodo/ZenodoPluginCollectionTest.java index e0112914b..3592ad6a5 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/zenodo/ZenodoPluginCollectionTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/zenodo/ZenodoPluginCollectionTest.java @@ -2,14 +2,12 @@ package eu.dnetlib.dhp.collection.plugin.zenodo; import java.util.zip.GZIPInputStream; + import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; - public class ZenodoPluginCollectionTest { - - @Test public void testZenodoIterator() throws Exception { @@ -27,5 +25,4 @@ public class ZenodoPluginCollectionTest { } } - } From 36ca0b123e1b2fbf6ea8fdd7cf8989357dde85ef Mon Sep 17 00:00:00 2001 From: Giambattista Bloisi Date: Wed, 30 Oct 2024 15:23:05 +0100 Subject: [PATCH 60/73] Move AuthorMatchers in dhp-common --- .../dnetlib/dhp/utils}/AuthorMatchers.scala | 21 +++++----- .../dhp/utils}/ORCIDAuthorEnricher.scala | 39 +++++++++++++++---- .../dhp/utils/AuthorMatchersTest.scala | 6 +-- .../eu/dnetlib/pace/tree/AuthorsMatch.java | 2 +- .../SparkEnrichGraphWithOrcidAuthors.scala | 29 +------------- 5 files changed, 47 insertions(+), 50 deletions(-) rename {dhp-pace-core/src/main/java/eu/dnetlib/pace/util => dhp-common/src/main/scala/eu/dnetlib/dhp/utils}/AuthorMatchers.scala (81%) rename {dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/enrich/orcid => dhp-common/src/main/scala/eu/dnetlib/dhp/utils}/ORCIDAuthorEnricher.scala (78%) rename dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/enrich/orcid/ORCIDAuthorMatchersTest.scala => dhp-common/src/test/java/eu/dnetlib/dhp/utils/AuthorMatchersTest.scala (91%) diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/AuthorMatchers.scala b/dhp-common/src/main/scala/eu/dnetlib/dhp/utils/AuthorMatchers.scala similarity index 81% rename from dhp-pace-core/src/main/java/eu/dnetlib/pace/util/AuthorMatchers.scala rename to dhp-common/src/main/scala/eu/dnetlib/dhp/utils/AuthorMatchers.scala index 116f515ed..5f842726f 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/AuthorMatchers.scala +++ b/dhp-common/src/main/scala/eu/dnetlib/dhp/utils/AuthorMatchers.scala @@ -1,4 +1,4 @@ -package eu.dnetlib.pace.util +package eu.dnetlib.dhp.utils import java.util.Locale import java.util.regex.Pattern @@ -66,19 +66,18 @@ object AuthorMatchers { } def removeMatches( - graph_authors: java.util.List[String], - orcid_authors: java.util.List[String], - matchingFunc: java.util.function.BiFunction[String,String,Boolean] - ) : java.util.List[String] = { - removeMatches(graph_authors, orcid_authors, (a, b) => matchingFunc(a,b)) + graph_authors: java.util.List[String], + orcid_authors: java.util.List[String], + matchingFunc: java.util.function.BiFunction[String, String, Boolean] + ): java.util.List[String] = { + removeMatches(graph_authors, orcid_authors, (a, b) => matchingFunc(a, b)) } - def removeMatches( - graph_authors: java.util.List[String], - orcid_authors: java.util.List[String], - matchingFunc: (String, String) => Boolean - ) : java.util.List[String] = { + graph_authors: java.util.List[String], + orcid_authors: java.util.List[String], + matchingFunc: (String, String) => Boolean + ): java.util.List[String] = { val matched = new java.util.ArrayList[String]() if (graph_authors != null && !graph_authors.isEmpty) { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/enrich/orcid/ORCIDAuthorEnricher.scala b/dhp-common/src/main/scala/eu/dnetlib/dhp/utils/ORCIDAuthorEnricher.scala similarity index 78% rename from dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/enrich/orcid/ORCIDAuthorEnricher.scala rename to dhp-common/src/main/scala/eu/dnetlib/dhp/utils/ORCIDAuthorEnricher.scala index 2e23a3a59..6a087b1de 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/enrich/orcid/ORCIDAuthorEnricher.scala +++ b/dhp-common/src/main/scala/eu/dnetlib/dhp/utils/ORCIDAuthorEnricher.scala @@ -1,21 +1,44 @@ -package eu.dnetlib.dhp.enrich.orcid +package eu.dnetlib.dhp.utils import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.oaf.{Author, StructuredProperty} import eu.dnetlib.dhp.schema.sx.OafUtils -import eu.dnetlib.pace.util.AuthorMatchers import java.util import scala.beans.BeanProperty import scala.collection.JavaConverters._ import scala.util.control.Breaks.{break, breakable} +case class OrcidAuthor( + @BeanProperty var orcid: String, + @BeanProperty var familyName: String, + @BeanProperty var givenName: String, + @BeanProperty var creditName: String, + @BeanProperty var otherNames: java.util.List[String] +) { + def this() = this("null", "null", "null", "null", null) +} + +case class MatchedAuthors( + @BeanProperty var author: Author, + @BeanProperty var orcid: OrcidAuthor, + @BeanProperty var `type`: String +) + +case class MatchData( + @BeanProperty var id: String, + @BeanProperty var graph_authors: java.util.List[Author], + @BeanProperty var orcid_authors: java.util.List[OrcidAuthor] +) { + def this() = this("null", null, null) +} + case class ORCIDAuthorEnricherResult( @BeanProperty var id: String, @BeanProperty var enriched_author: java.util.List[Author], @BeanProperty var author_matched: java.util.List[MatchedAuthors], @BeanProperty var author_unmatched: java.util.List[Author], - @BeanProperty var orcid_unmatched: java.util.List[OrcidAutor] + @BeanProperty var orcid_unmatched: java.util.List[OrcidAuthor] ) object ORCIDAuthorEnricher extends Serializable { @@ -23,7 +46,7 @@ object ORCIDAuthorEnricher extends Serializable { def enrichOrcid( id: String, graph_authors: java.util.List[Author], - orcid_authors: java.util.List[OrcidAutor] + orcid_authors: java.util.List[OrcidAuthor] ): ORCIDAuthorEnricherResult = { // Author enriching strategy: // 1) create a copy of graph author list in unmatched_authors @@ -81,10 +104,10 @@ object ORCIDAuthorEnricher extends Serializable { } private def extractAndEnrichMatches( - graph_authors: java.util.List[Author], - orcid_authors: java.util.List[OrcidAutor], - matchingFunc: (Author, OrcidAutor) => Boolean, - matchName: String + graph_authors: java.util.List[Author], + orcid_authors: java.util.List[OrcidAuthor], + matchingFunc: (Author, OrcidAuthor) => Boolean, + matchName: String ) = { val matched = scala.collection.mutable.ArrayBuffer.empty[MatchedAuthors] diff --git a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/enrich/orcid/ORCIDAuthorMatchersTest.scala b/dhp-common/src/test/java/eu/dnetlib/dhp/utils/AuthorMatchersTest.scala similarity index 91% rename from dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/enrich/orcid/ORCIDAuthorMatchersTest.scala rename to dhp-common/src/test/java/eu/dnetlib/dhp/utils/AuthorMatchersTest.scala index eece56b74..fc9dcd1da 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/enrich/orcid/ORCIDAuthorMatchersTest.scala +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/utils/AuthorMatchersTest.scala @@ -1,10 +1,10 @@ -package eu.dnetlib.dhp.enrich.orcid +package eu.dnetlib.dhp.utils -import eu.dnetlib.pace.util.AuthorMatchers.matchOrderedTokenAndAbbreviations +import eu.dnetlib.dhp.utils.AuthorMatchers.matchOrderedTokenAndAbbreviations import org.junit.jupiter.api.Assertions.{assertFalse, assertTrue} import org.junit.jupiter.api.Test -class ORCIDAuthorMatchersTest { +class AuthorMatchersTest { @Test def testShortNames(): Unit = { assertTrue(matchOrderedTokenAndAbbreviations("Lasagni Mariozzi Federico", "Lasagni F. Mariozzi")) diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/AuthorsMatch.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/AuthorsMatch.java index 07080b09e..dcf9241ed 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/AuthorsMatch.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/AuthorsMatch.java @@ -9,11 +9,11 @@ import java.util.stream.Collectors; import com.wcohen.ss.AbstractStringDistance; +import eu.dnetlib.dhp.utils.AuthorMatchers; import eu.dnetlib.pace.config.Config; import eu.dnetlib.pace.model.Person; import eu.dnetlib.pace.tree.support.AbstractListComparator; import eu.dnetlib.pace.tree.support.ComparatorClass; -import eu.dnetlib.pace.util.AuthorMatchers; @ComparatorClass("authorsMatch") public class AuthorsMatch extends AbstractListComparator { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/enrich/orcid/SparkEnrichGraphWithOrcidAuthors.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/enrich/orcid/SparkEnrichGraphWithOrcidAuthors.scala index 847a5f090..e615f02d8 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/enrich/orcid/SparkEnrichGraphWithOrcidAuthors.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/enrich/orcid/SparkEnrichGraphWithOrcidAuthors.scala @@ -2,38 +2,13 @@ package eu.dnetlib.dhp.enrich.orcid import eu.dnetlib.dhp.application.AbstractScalaApplication import eu.dnetlib.dhp.schema.common.ModelSupport -import eu.dnetlib.dhp.schema.oaf._ +import eu.dnetlib.dhp.utils.{MatchData, ORCIDAuthorEnricher, ORCIDAuthorEnricherResult} import org.apache.spark.sql._ import org.apache.spark.sql.functions._ import org.slf4j.{Logger, LoggerFactory} -import scala.beans.BeanProperty import scala.collection.JavaConverters._ -case class OrcidAutor( - @BeanProperty var orcid: String, - @BeanProperty var familyName: String, - @BeanProperty var givenName: String, - @BeanProperty var creditName: String, - @BeanProperty var otherNames: java.util.List[String] -) { - def this() = this("null", "null", "null", "null", null) -} - -case class MatchData( - @BeanProperty var id: String, - @BeanProperty var graph_authors: java.util.List[Author], - @BeanProperty var orcid_authors: java.util.List[OrcidAutor] -) { - def this() = this("null", null, null) -} - -case class MatchedAuthors( - @BeanProperty var author: Author, - @BeanProperty var orcid: OrcidAutor, - @BeanProperty var `type`: String -) - class SparkEnrichGraphWithOrcidAuthors(propertyPath: String, args: Array[String], log: Logger) extends AbstractScalaApplication(propertyPath, args, log: Logger) { @@ -87,7 +62,7 @@ class SparkEnrichGraphWithOrcidAuthors(propertyPath: String, args: Array[String] } - private def createTemporaryData(graphPath: String, orcidPath: String, targetPath: String): Unit = { + def createTemporaryData(graphPath: String, orcidPath: String, targetPath: String): Unit = { val orcidAuthors = spark.read.load(s"$orcidPath/Authors").select("orcid", "familyName", "givenName", "creditName", "otherNames") From 43a9fe1ef49422afa975f3e97700ca0115b717fa Mon Sep 17 00:00:00 2001 From: Giambattista Bloisi Date: Wed, 30 Oct 2024 15:23:12 +0100 Subject: [PATCH 61/73] Draft SparkPropagateOrcidAuthors --- .../orcid/SparkPropagateOrcidAuthors.scala | 63 +++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/enrich/orcid/SparkPropagateOrcidAuthors.scala diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/enrich/orcid/SparkPropagateOrcidAuthors.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/enrich/orcid/SparkPropagateOrcidAuthors.scala new file mode 100644 index 000000000..2511830a2 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/enrich/orcid/SparkPropagateOrcidAuthors.scala @@ -0,0 +1,63 @@ +package eu.dnetlib.dhp.enrich.orcid + +import eu.dnetlib.dhp.schema.common.ModelSupport +import eu.dnetlib.dhp.schema.oaf.{Relation, Result} +import eu.dnetlib.dhp.utils.OrcidAuthor +import org.apache.spark.sql._ +import org.apache.spark.sql.functions._ +import org.slf4j.{Logger, LoggerFactory} + +import scala.collection.JavaConverters._ + +class SparkPropagateOrcidAuthors(propertyPath: String, args: Array[String], log: Logger) + extends SparkEnrichGraphWithOrcidAuthors(propertyPath, args, log: Logger) { + + override def createTemporaryData(graphPath: String, orcidPath: String, targetPath: String): Unit = { + val relEnc = Encoders.bean(classOf[Relation]) + + ModelSupport.entityTypes.asScala + .filter(e => ModelSupport.isResult(e._1)) + .foreach(e => { + val resultType = e._1.name() + val enc = Encoders.bean(e._2) + + val orcidDnet = spark.read + .load("$graphPath/$resultType") + .as[Result] + .map( + result => + ( + result.getId, + result.getAuthor.asScala.map(a => OrcidAuthor("extract ORCID", a.getSurname, a.getName, a.getFullname, null)) + ) + ) + .where("size(_2) > 0") + .selectExpr("_1 as id", "_2 as orcid_authors") + + val result = + spark.read.schema(enc.schema).json(s"$graphPath/$resultType").selectExpr("id", "author as graph_authors") + + val supplements = spark.read.schema(relEnc.schema).json(s"$graphPath/relation").where("relclass IN('isSupplementedBy', 'isSupplementOf')").selectExpr("source as id", "target") + + result + .join(supplements, Seq("id")) + .join(orcidDnet, orcidDnet("id") === col("target")) + .drop("target") + .write + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .parquet(s"$targetPath/${resultType}_unmatched") + }) + } +} + +object SparkPropagateOrcidAuthors { + + val log: Logger = LoggerFactory.getLogger(SparkPropagateOrcidAuthors.getClass) + + def main(args: Array[String]): Unit = { + new SparkPropagateOrcidAuthors("/eu/dnetlib/dhp/enrich/orcid/enrich_graph_orcid_parameters.json", args, log) + .initialize() + .run() + } +} From 0cae08578648cc138da473c9825b372f86936177 Mon Sep 17 00:00:00 2001 From: Miriam Baglioni Date: Mon, 4 Nov 2024 12:41:32 +0100 Subject: [PATCH 62/73] [OrcidPropagation] new preparation step to use the authornamedisambiguation employed for orcid enrichment. --- .../AutoritativeAuthor.java | 43 ---- .../orcidtoresultfromsemrel/OrcidAuthors.java | 18 ++ .../PrepareResultOrcidAssociationStep1.java | 124 ---------- .../PrepareResultOrcidAssociationStep2.java | 95 -------- .../ResultOrcidList.java | 27 --- .../SparkOrcidToResultFromSemRelJob.java | 211 ------------------ .../SparkPrepareAuthorInfo.java | 164 ++++++++++++++ 7 files changed, 182 insertions(+), 500 deletions(-) delete mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/AutoritativeAuthor.java create mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidAuthors.java delete mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java delete mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep2.java delete mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/ResultOrcidList.java delete mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java create mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkPrepareAuthorInfo.java diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/AutoritativeAuthor.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/AutoritativeAuthor.java deleted file mode 100644 index a5fcab360..000000000 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/AutoritativeAuthor.java +++ /dev/null @@ -1,43 +0,0 @@ - -package eu.dnetlib.dhp.orcidtoresultfromsemrel; - -public class AutoritativeAuthor { - - private String name; - private String surname; - private String fullname; - private String orcid; - - public String getName() { - return name; - } - - public void setName(String name) { - this.name = name; - } - - public String getSurname() { - return surname; - } - - public void setSurname(String surname) { - this.surname = surname; - } - - public String getFullname() { - return fullname; - } - - public void setFullname(String fullname) { - this.fullname = fullname; - } - - public String getOrcid() { - return orcid; - } - - public void setOrcid(String orcid) { - this.orcid = orcid; - } - -} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidAuthors.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidAuthors.java new file mode 100644 index 000000000..0c687b991 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidAuthors.java @@ -0,0 +1,18 @@ +package eu.dnetlib.dhp.orcidtoresultfromsemrel; + +import eu.dnetlib.dhp.utils.OrcidAuthor; + +import java.io.Serializable; +import java.util.List; + +public class OrcidAuthors implements Serializable { + List orcidAuthorList; + + public List getOrcidAuthorList() { + return orcidAuthorList; + } + + public void setOrcidAuthorList(List orcidAuthorList) { + this.orcidAuthorList = orcidAuthorList; + } +} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java deleted file mode 100644 index bc72a2ae1..000000000 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java +++ /dev/null @@ -1,124 +0,0 @@ - -package eu.dnetlib.dhp.orcidtoresultfromsemrel; - -import static eu.dnetlib.dhp.PropagationConstant.*; -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; - -import java.util.Arrays; -import java.util.List; - -import org.apache.commons.io.IOUtils; -import org.apache.spark.SparkConf; -import org.apache.spark.sql.Dataset; -import org.apache.spark.sql.Encoders; -import org.apache.spark.sql.SaveMode; -import org.apache.spark.sql.SparkSession; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.google.gson.Gson; - -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.Relation; -import eu.dnetlib.dhp.schema.oaf.Result; - -public class PrepareResultOrcidAssociationStep1 { - private static final Logger log = LoggerFactory.getLogger(PrepareResultOrcidAssociationStep1.class); - - public static void main(String[] args) throws Exception { - String jsonConf = IOUtils - .toString( - PrepareResultOrcidAssociationStep1.class - .getResourceAsStream( - "/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters.json")); - - final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConf); - parser.parseArgument(args); - - Boolean isSparkSessionManaged = isSparkSessionManaged(parser); - log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - - String inputPath = parser.get("sourcePath"); - log.info("inputPath: {}", inputPath); - - final String outputPath = parser.get("outputPath"); - log.info("outputPath: {}", outputPath); - - final String resultClassName = parser.get("resultTableName"); - log.info("resultTableName: {}", resultClassName); - - final List allowedsemrel = Arrays.asList(parser.get("allowedsemrels").split(";")); - log.info("allowedSemRel: {}", new Gson().toJson(allowedsemrel)); - - final String resultType = resultClassName.substring(resultClassName.lastIndexOf(".") + 1).toLowerCase(); - log.info("resultType: {}", resultType); - - Class resultClazz = (Class) Class.forName(resultClassName); - - SparkConf conf = new SparkConf(); - conf.set("hive.metastore.uris", parser.get("hive_metastore_uris")); - - String inputRelationPath = inputPath + "/relation"; - log.info("inputRelationPath: {}", inputRelationPath); - - String inputResultPath = inputPath + "/" + resultType; - log.info("inputResultPath: {}", inputResultPath); - - String outputResultPath = outputPath + "/" + resultType; - log.info("outputResultPath: {}", outputResultPath); - - runWithSparkHiveSession( - conf, - isSparkSessionManaged, - spark -> { - removeOutputDir(spark, outputPath); - prepareInfo( - spark, inputRelationPath, inputResultPath, outputResultPath, resultClazz, allowedsemrel); - }); - } - - private static void prepareInfo( - SparkSession spark, - String inputRelationPath, - String inputResultPath, - String outputResultPath, - Class resultClazz, - List allowedsemrel) { - - Dataset relation = readPath(spark, inputRelationPath, Relation.class); - relation.createOrReplaceTempView("relation"); - - log.info("Reading Graph table from: {}", inputResultPath); - Dataset result = readPath(spark, inputResultPath, resultClazz); - result.createOrReplaceTempView("result"); - - String query = "SELECT target resultId, author authorList" - + " FROM (SELECT id, collect_set(named_struct('name', name, 'surname', surname, 'fullname', fullname, 'orcid', orcid)) author " - + " FROM ( " - + " SELECT DISTINCT id, MyT.fullname, MyT.name, MyT.surname, MyP.value orcid " - + " FROM result " - + " LATERAL VIEW EXPLODE (author) a AS MyT " - + " LATERAL VIEW EXPLODE (MyT.pid) p AS MyP " - + " WHERE lower(MyP.qualifier.classid) = '" + ModelConstants.ORCID + "' or " - + " lower(MyP.qualifier.classid) = '" + ModelConstants.ORCID_PENDING + "') tmp " - + " GROUP BY id) r_t " - + " JOIN (" - + " SELECT source, target " - + " FROM relation " - + " WHERE datainfo.deletedbyinference = false " - + getConstraintList(" lower(relclass) = '", allowedsemrel) - + " ) rel_rel " - + " ON source = id"; - - log.info("executedQuery: {}", query); - spark - .sql(query) - .as(Encoders.bean(ResultOrcidList.class)) - .write() - .option("compression", "gzip") - .mode(SaveMode.Overwrite) - .json(outputResultPath); - } - -} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep2.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep2.java deleted file mode 100644 index 46894d0e1..000000000 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep2.java +++ /dev/null @@ -1,95 +0,0 @@ - -package eu.dnetlib.dhp.orcidtoresultfromsemrel; - -import static eu.dnetlib.dhp.PropagationConstant.*; -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; - -import java.util.HashSet; -import java.util.Set; - -import org.apache.commons.io.IOUtils; -import org.apache.hadoop.io.compress.GzipCodec; -import org.apache.spark.SparkConf; -import org.apache.spark.sql.*; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.fasterxml.jackson.databind.ObjectMapper; - -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import scala.Tuple2; - -public class PrepareResultOrcidAssociationStep2 { - private static final Logger log = LoggerFactory.getLogger(PrepareResultOrcidAssociationStep2.class); - - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - - public static void main(String[] args) throws Exception { - String jsonConfiguration = IOUtils - .toString( - PrepareResultOrcidAssociationStep2.class - .getResourceAsStream( - "/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters2.json")); - - final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); - - parser.parseArgument(args); - - Boolean isSparkSessionManaged = isSparkSessionManaged(parser); - log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - - String inputPath = parser.get("sourcePath"); - log.info("inputPath: {}", inputPath); - - final String outputPath = parser.get("outputPath"); - log.info("outputPath: {}", outputPath); - - SparkConf conf = new SparkConf(); - - runWithSparkSession( - conf, - isSparkSessionManaged, - spark -> { - removeOutputDir(spark, outputPath); - mergeInfo(spark, inputPath, outputPath); - }); - } - - private static void mergeInfo(SparkSession spark, String inputPath, String outputPath) { - - Dataset resultOrcidAssoc = readPath(spark, inputPath + "/publication", ResultOrcidList.class) - .union(readPath(spark, inputPath + "/dataset", ResultOrcidList.class)) - .union(readPath(spark, inputPath + "/otherresearchproduct", ResultOrcidList.class)) - .union(readPath(spark, inputPath + "/software", ResultOrcidList.class)); - - resultOrcidAssoc - .toJavaRDD() - .mapToPair(r -> new Tuple2<>(r.getResultId(), r)) - .reduceByKey( - (a, b) -> { - if (a == null) { - return b; - } - if (b == null) { - return a; - } - Set orcid_set = new HashSet<>(); - a.getAuthorList().stream().forEach(aa -> orcid_set.add(aa.getOrcid())); - b - .getAuthorList() - .stream() - .forEach( - aa -> { - if (!orcid_set.contains(aa.getOrcid())) { - a.getAuthorList().add(aa); - orcid_set.add(aa.getOrcid()); - } - }); - return a; - }) - .map(Tuple2::_2) - .map(r -> OBJECT_MAPPER.writeValueAsString(r)) - .saveAsTextFile(outputPath, GzipCodec.class); - } - -} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/ResultOrcidList.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/ResultOrcidList.java deleted file mode 100644 index 54b415d1c..000000000 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/ResultOrcidList.java +++ /dev/null @@ -1,27 +0,0 @@ - -package eu.dnetlib.dhp.orcidtoresultfromsemrel; - -import java.io.Serializable; -import java.util.ArrayList; -import java.util.List; - -public class ResultOrcidList implements Serializable { - String resultId; - List authorList = new ArrayList<>(); - - public String getResultId() { - return resultId; - } - - public void setResultId(String resultId) { - this.resultId = resultId; - } - - public List getAuthorList() { - return authorList; - } - - public void setAuthorList(List authorList) { - this.authorList = authorList; - } -} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java deleted file mode 100644 index c5d632658..000000000 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java +++ /dev/null @@ -1,211 +0,0 @@ - -package eu.dnetlib.dhp.orcidtoresultfromsemrel; - -import static eu.dnetlib.dhp.PropagationConstant.*; -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; - -import java.util.List; -import java.util.Optional; - -import org.apache.commons.io.IOUtils; -import org.apache.commons.lang3.StringUtils; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.function.MapFunction; -import org.apache.spark.sql.Dataset; -import org.apache.spark.sql.Encoders; -import org.apache.spark.sql.SaveMode; -import org.apache.spark.sql.SparkSession; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.google.common.collect.Lists; - -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.common.PacePerson; -import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.Author; -import eu.dnetlib.dhp.schema.oaf.Result; -import eu.dnetlib.dhp.schema.oaf.StructuredProperty; -import scala.Tuple2; - -public class SparkOrcidToResultFromSemRelJob { - private static final Logger log = LoggerFactory.getLogger(SparkOrcidToResultFromSemRelJob.class); - - public static void main(String[] args) throws Exception { - String jsonConfiguration = IOUtils - .toString( - SparkOrcidToResultFromSemRelJob.class - .getResourceAsStream( - "/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json")); - - final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); - parser.parseArgument(args); - - Boolean isSparkSessionManaged = isSparkSessionManaged(parser); - log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - - final String inputPath = parser.get("sourcePath"); - log.info("inputPath: {}", inputPath); - - final String outputPath = parser.get("outputPath"); - log.info("outputPath: {}", outputPath); - - final String possibleUpdates = parser.get("possibleUpdatesPath"); - log.info("possibleUpdatesPath: {}", possibleUpdates); - - final String resultClassName = parser.get("resultTableName"); - log.info("resultTableName: {}", resultClassName); - - final Boolean saveGraph = Optional - .ofNullable(parser.get("saveGraph")) - .map(Boolean::valueOf) - .orElse(Boolean.TRUE); - log.info("saveGraph: {}", saveGraph); - - Class resultClazz = (Class) Class.forName(resultClassName); - - SparkConf conf = new SparkConf(); - - runWithSparkSession( - conf, - isSparkSessionManaged, - spark -> { - removeOutputDir(spark, outputPath); - if (saveGraph) { - execPropagation(spark, possibleUpdates, inputPath, outputPath, resultClazz); - } - }); - } - - private static void execPropagation( - SparkSession spark, - String possibleUpdatesPath, - String inputPath, - String outputPath, - Class resultClazz) { - - // read possible updates (resultId and list of possible orcid to add - Dataset possible_updates = readPath(spark, possibleUpdatesPath, ResultOrcidList.class); - // read the result we have been considering - Dataset result = readPath(spark, inputPath, resultClazz); - // make join result left_outer with possible updates - - result - .joinWith( - possible_updates, - result.col("id").equalTo(possible_updates.col("resultId")), - "left_outer") - .map(authorEnrichFn(), Encoders.bean(resultClazz)) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .json(outputPath); - } - - private static MapFunction, R> authorEnrichFn() { - return value -> { - R ret = value._1(); - Optional rol = Optional.ofNullable(value._2()); - if (rol.isPresent() && Optional.ofNullable(ret.getAuthor()).isPresent()) { - List toenrich_author = ret.getAuthor(); - List autoritativeAuthors = rol.get().getAuthorList(); - for (Author author : toenrich_author) { - if (!containsAllowedPid(author)) { - enrichAuthor(author, autoritativeAuthors); - } - } - } - - return ret; - }; - } - - private static void enrichAuthor(Author a, List au) { - PacePerson pp = new PacePerson(a.getFullname(), false); - for (AutoritativeAuthor aa : au) { - if (enrichAuthor(aa, a, pp.getNormalisedFirstName(), pp.getNormalisedSurname())) { - return; - } - } - } - - private static boolean enrichAuthor(AutoritativeAuthor autoritative_author, Author author, - String author_name, - String author_surname) { - boolean toaddpid = false; - - if (StringUtils.isNotEmpty(autoritative_author.getSurname())) { - if (StringUtils.isNotEmpty(author.getSurname())) { - author_surname = author.getSurname(); - } - if (StringUtils.isNotEmpty(author_surname)) { - // have the same surname. Check the name - if (autoritative_author - .getSurname() - .trim() - .equalsIgnoreCase(author_surname.trim()) && StringUtils.isNotEmpty(autoritative_author.getName())) { - if (StringUtils.isNotEmpty(author.getName())) { - author_name = author.getName(); - } - if (StringUtils.isNotEmpty(author_name)) { - if (autoritative_author - .getName() - .trim() - .equalsIgnoreCase(author_name.trim())) { - toaddpid = true; - } - // they could be differently written (i.e. only the initials of the name - // in one of the two - else { - if (autoritative_author - .getName() - .trim() - .substring(0, 0) - .equalsIgnoreCase(author_name.trim().substring(0, 0))) { - toaddpid = true; - } - } - } - } - } - } - if (toaddpid) { - StructuredProperty p = new StructuredProperty(); - p.setValue(autoritative_author.getOrcid()); - p - .setQualifier( - getQualifier( - ModelConstants.ORCID_PENDING, ModelConstants.ORCID_CLASSNAME, ModelConstants.DNET_PID_TYPES)); - p - .setDataInfo( - getDataInfo( - PROPAGATION_DATA_INFO_TYPE, - PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_ID, - PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_NAME, - ModelConstants.DNET_PROVENANCE_ACTIONS)); - - Optional> authorPid = Optional.ofNullable(author.getPid()); - if (authorPid.isPresent()) { - authorPid.get().add(p); - } else { - author.setPid(Lists.newArrayList(p)); - } - - } - return toaddpid; - } - - private static boolean containsAllowedPid(Author a) { - Optional> pids = Optional.ofNullable(a.getPid()); - if (!pids.isPresent()) { - return false; - } - for (StructuredProperty pid : pids.get()) { - if (ModelConstants.ORCID_PENDING.equalsIgnoreCase(pid.getQualifier().getClassid()) || - ModelConstants.ORCID.equalsIgnoreCase(pid.getQualifier().getClassid())) { - return true; - } - } - return false; - } -} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkPrepareAuthorInfo.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkPrepareAuthorInfo.java new file mode 100644 index 000000000..32337a25b --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkPrepareAuthorInfo.java @@ -0,0 +1,164 @@ +package eu.dnetlib.dhp.orcidtoresultfromsemrel; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.common.ModelSupport; +import eu.dnetlib.dhp.schema.oaf.Author; +import eu.dnetlib.dhp.schema.oaf.Relation; +import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.utils.OrcidAuthor; +import org.apache.commons.io.IOUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.FilterFunction; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.*; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import scala.Tuple2; + + +import java.util.List; +import java.util.Optional; +import java.util.stream.Collectors; + +import static eu.dnetlib.dhp.PropagationConstant.isSparkSessionManaged; +import static eu.dnetlib.dhp.PropagationConstant.removeOutputDir; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +public class SparkPrepareAuthorInfo { + private static final Logger log = LoggerFactory.getLogger(SparkPrepareAuthorInfo.class); + + public static void main(String[] args) throws Exception { + String jsonConfiguration = IOUtils + .toString( + SparkPrepareAuthorInfo.class + .getResourceAsStream( + "/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json")); + + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); + parser.parseArgument(args); + + Boolean isSparkSessionManaged = isSparkSessionManaged(parser); + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + final String inputPath = parser.get("sourcePath"); + log.info("inputPath: {}", inputPath); + + final String outputPath = parser.get("outputPath"); + log.info("outputPath: {}", outputPath); + + SparkConf conf = new SparkConf(); + + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> { + removeOutputDir(spark, outputPath); + createTemporaryData(spark, inputPath, outputPath); + + } + ); + } + + private static void createTemporaryData(SparkSession spark, String inputPath, String outputPath) { + ModelSupport.entityTypes + .keySet().stream().filter(ModelSupport::isResult) + .forEach(e -> { + Dataset orcidDnet = spark.read().schema(Encoders.bean(Result.class).schema()) + .json(inputPath + e.name()) + .as(Encoders.bean(Result.class)) + .filter((FilterFunction) r -> r.getAuthor().stream() + .anyMatch(a -> a.getPid() + .stream() + .anyMatch(p -> p.getQualifier().getClassid().equalsIgnoreCase(ModelConstants.ORCID) || + p.getQualifier().getClassid().equalsIgnoreCase(ModelConstants.ORCID_PENDING)))) + .map((MapFunction>) r -> + new Tuple2<>(r.getId(), getOrcidAuthorsList(r.getAuthor())) + , Encoders.tuple(Encoders.STRING(), Encoders.bean(OrcidAuthors.class))) + .selectExpr("_1 as id", "_2 as orcid_authors"); + + Dataset result = spark.read().schema(Encoders.bean(Result.class).schema()) + .json(inputPath + e.name()) + .as(Encoders.bean(Result.class)) + .selectExpr("id", "author as graph_authors"); + + Dataset supplements = spark.read() + .schema(Encoders.bean(Relation.class).schema()) + .json(inputPath + "relation") + .where("relclass IN('" + ModelConstants.IS_SUPPLEMENT_TO + "', '" + + ModelConstants.IS_SUPPLEMENTED_BY + "')") + .selectExpr("source as id", "target"); + + result + .join(supplements, "id") + .join(orcidDnet, orcidDnet.col("id").equalTo(supplements.col("target"))) + .drop("target") + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .parquet(outputPath + e.name() + "_unmatched"); + + }); + + + } + + + +// override def createTemporaryData(graphPath: String, orcidPath: String, targetPath: String): Unit = { +// val relEnc = Encoders.bean(classOf[Relation]) +// +// ModelSupport.entityTypes.asScala +// .filter(e => ModelSupport.isResult(e._1)) +// .foreach(e => { +// val resultType = e._1.name() +// val enc = Encoders.bean(e._2) +// +// val orcidDnet = spark.read +// .load("$graphPath/$resultType") +// .as[Result] +// .map( +// result => +// ( +// result.getId, +// result.getAuthor.asScala.map(a => OrcidAuthor("extract ORCID", a.getSurname, a.getName, a.getFullname, null)) +// ) +// ) +// .where("size(_2) > 0") +// .selectExpr("_1 as id", "_2 as orcid_authors") +// +// val result = +// spark.read.schema(enc.schema).json(s"$graphPath/$resultType").selectExpr("id", "author as graph_authors") +// +// val supplements = spark.read.schema(relEnc.schema).json(s"$graphPath/relation").where("relclass IN('isSupplementedBy', 'isSupplementOf')").selectExpr("source as id", "target") +// +// result +// .join(supplements, Seq("id")) +// .join(orcidDnet, orcidDnet("id") === col("target")) +// .drop("target") +// .write +// .mode(SaveMode.Overwrite) +// .option("compression", "gzip") +// .parquet(s"$targetPath/${resultType}_unmatched") +// }) +// } + + private static OrcidAuthors getOrcidAuthorsList(List authors) { + OrcidAuthors oas = new OrcidAuthors(); + List tmp = authors.stream().map(SparkPrepareAuthorInfo::getOrcidAuthor).collect(Collectors.toList()); + oas.setOrcidAuthorList(tmp); + return oas; + } + + private static OrcidAuthor getOrcidAuthor(Author a){ + return new OrcidAuthor(getOrcid(a),a.getSurname(), a.getName(), a.getFullname(), null); + + } + + private static String getOrcid(Author a){ + if (a.getPid().stream().anyMatch(p->p.getQualifier().getClassid().equalsIgnoreCase(ModelConstants.ORCID))) + return a.getPid().stream().filter(p->p.getQualifier().getClassid().equalsIgnoreCase(ModelConstants.ORCID)).findFirst().get().getValue(); + return a.getPid().stream().filter(p->p.getQualifier().getClassid().equalsIgnoreCase(ModelConstants.ORCID_PENDING)).findFirst().get().getValue(); + + } +} From eb83a34f64a36b887d0dc2eba3552d6a0ddc4050 Mon Sep 17 00:00:00 2001 From: Miriam Baglioni Date: Mon, 4 Nov 2024 12:42:11 +0100 Subject: [PATCH 63/73] [OrcidPropagation] alignemnt of property file with new parameters --- .../input_orcidtoresult_parameters.json | 25 +------------------ 1 file changed, 1 insertion(+), 24 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json index 3cbaa23bb..125a82037 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json @@ -5,12 +5,7 @@ "paramDescription": "the path of the sequencial file to read", "paramRequired": true }, - { - "paramName":"sg", - "paramLongName":"saveGraph", - "paramDescription": "true if the new version of the graph must be saved", - "paramRequired": false - }, + { "paramName": "out", "paramLongName": "outputPath", @@ -22,23 +17,5 @@ "paramLongName": "isSparkSessionManaged", "paramDescription": "true if the spark session is managed, false otherwise", "paramRequired": false - }, - { - "paramName":"tn", - "paramLongName":"resultTableName", - "paramDescription": "the name of the result table we are currently working on", - "paramRequired": true - }, - { - "paramName":"pu", - "paramLongName":"possibleUpdatesPath", - "paramDescription": "the path the the association resultId orcid author list can be found", - "paramRequired": true - }, - { - "paramName":"test", - "paramLongName":"isTest", - "paramDescription": "true if it is executing a test", - "paramRequired": false } ] \ No newline at end of file From da9bbdede4077a9abc435bed06b94257f6caa6f3 Mon Sep 17 00:00:00 2001 From: Miriam Baglioni Date: Tue, 5 Nov 2024 14:11:06 +0100 Subject: [PATCH 64/73] [orcidenrichment] refactoring --- .../author/SparkEnrichWithOrcidAuthors.scala | 87 ++++++++++ .../SparkPrepareAuthorInfo.java | 164 ------------------ .../SparkPropagateOrcidAuthor.java | 100 +++++++++++ .../SparkEnrichGraphWithOrcidAuthors.scala | 92 ++-------- .../orcid/SparkPropagateOrcidAuthors.scala | 63 ------- 5 files changed, 198 insertions(+), 308 deletions(-) create mode 100644 dhp-common/src/main/scala/eu/dnetlib/dhp/common/author/SparkEnrichWithOrcidAuthors.scala delete mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkPrepareAuthorInfo.java create mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkPropagateOrcidAuthor.java delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/enrich/orcid/SparkPropagateOrcidAuthors.scala diff --git a/dhp-common/src/main/scala/eu/dnetlib/dhp/common/author/SparkEnrichWithOrcidAuthors.scala b/dhp-common/src/main/scala/eu/dnetlib/dhp/common/author/SparkEnrichWithOrcidAuthors.scala new file mode 100644 index 000000000..7e62e2012 --- /dev/null +++ b/dhp-common/src/main/scala/eu/dnetlib/dhp/common/author/SparkEnrichWithOrcidAuthors.scala @@ -0,0 +1,87 @@ +package eu.dnetlib.dhp.common.author + +import eu.dnetlib.dhp.application.AbstractScalaApplication +import eu.dnetlib.dhp.schema.common.ModelSupport +import eu.dnetlib.dhp.utils.{MatchData, ORCIDAuthorEnricher, ORCIDAuthorEnricherResult} +import org.apache.spark.sql._ +import org.apache.spark.sql.functions._ +import org.slf4j.{Logger, LoggerFactory} + +import scala.collection.JavaConverters._ + +abstract class SparkEnrichWithOrcidAuthors(propertyPath: String, args: Array[String], log: Logger) + extends AbstractScalaApplication(propertyPath, args, log: Logger) { + + /** Here all the spark applications runs this method + * where the whole logic of the spark node is defined + */ + override def run(): Unit = { + val graphPath = parser.get("graphPath") + log.info(s"graphPath is '$graphPath'") + val orcidPath = parser.get("orcidPath") + log.info(s"orcidPath is '$orcidPath'") + val targetPath = parser.get("targetPath") + log.info(s"targetPath is '$targetPath'") + val workingDir = parser.get("workingDir") + log.info(s"targetPath is '$workingDir'") + + createTemporaryData(graphPath, orcidPath, workingDir) + analisys(workingDir) + generateGraph(graphPath, workingDir, targetPath) + } + + private def generateGraph(graphPath: String, workingDir: String, targetPath: String): Unit = { + + ModelSupport.entityTypes.asScala + .filter(e => ModelSupport.isResult(e._1)) + .foreach(e => { + val resultType = e._1.name() + val enc = Encoders.bean(e._2) + + val matched = spark.read + .schema(Encoders.bean(classOf[ORCIDAuthorEnricherResult]).schema) + .parquet(s"${workingDir}/${resultType}_matched") + .selectExpr("id", "enriched_author") + + spark.read + .schema(enc.schema) + .json(s"$graphPath/$resultType") + .join(matched, Seq("id"), "left") + .withColumn( + "author", + when(size(col("enriched_author")).gt(0), col("enriched_author")) + .otherwise(col("author")) + ) + .drop("enriched_author") + .write + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(s"${targetPath}/${resultType}") + + }) + + } + + def createTemporaryData(spark: SparkSession, graphPath: String, orcidPath: String, targetPath: String): Unit + + private def analisys(targetPath: String): Unit = { + ModelSupport.entityTypes.asScala + .filter(e => ModelSupport.isResult(e._1)) + .foreach(e => { + val resultType = e._1.name() + + spark.read + .parquet(s"$targetPath/${resultType}_unmatched") + .where("size(graph_authors) > 0") + .as[MatchData](Encoders.bean(classOf[MatchData])) + .map(md => { + ORCIDAuthorEnricher.enrichOrcid(md.id, md.graph_authors, md.orcid_authors) + })(Encoders.bean(classOf[ORCIDAuthorEnricherResult])) + .write + .option("compression", "gzip") + .mode("overwrite") + .parquet(s"$targetPath/${resultType}_matched") + }) + } +} + diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkPrepareAuthorInfo.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkPrepareAuthorInfo.java deleted file mode 100644 index 32337a25b..000000000 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkPrepareAuthorInfo.java +++ /dev/null @@ -1,164 +0,0 @@ -package eu.dnetlib.dhp.orcidtoresultfromsemrel; - -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.Author; -import eu.dnetlib.dhp.schema.oaf.Relation; -import eu.dnetlib.dhp.schema.oaf.Result; -import eu.dnetlib.dhp.utils.OrcidAuthor; -import org.apache.commons.io.IOUtils; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.function.FilterFunction; -import org.apache.spark.api.java.function.MapFunction; -import org.apache.spark.sql.*; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import scala.Tuple2; - - -import java.util.List; -import java.util.Optional; -import java.util.stream.Collectors; - -import static eu.dnetlib.dhp.PropagationConstant.isSparkSessionManaged; -import static eu.dnetlib.dhp.PropagationConstant.removeOutputDir; -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; - -public class SparkPrepareAuthorInfo { - private static final Logger log = LoggerFactory.getLogger(SparkPrepareAuthorInfo.class); - - public static void main(String[] args) throws Exception { - String jsonConfiguration = IOUtils - .toString( - SparkPrepareAuthorInfo.class - .getResourceAsStream( - "/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json")); - - final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); - parser.parseArgument(args); - - Boolean isSparkSessionManaged = isSparkSessionManaged(parser); - log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - - final String inputPath = parser.get("sourcePath"); - log.info("inputPath: {}", inputPath); - - final String outputPath = parser.get("outputPath"); - log.info("outputPath: {}", outputPath); - - SparkConf conf = new SparkConf(); - - runWithSparkSession( - conf, - isSparkSessionManaged, - spark -> { - removeOutputDir(spark, outputPath); - createTemporaryData(spark, inputPath, outputPath); - - } - ); - } - - private static void createTemporaryData(SparkSession spark, String inputPath, String outputPath) { - ModelSupport.entityTypes - .keySet().stream().filter(ModelSupport::isResult) - .forEach(e -> { - Dataset orcidDnet = spark.read().schema(Encoders.bean(Result.class).schema()) - .json(inputPath + e.name()) - .as(Encoders.bean(Result.class)) - .filter((FilterFunction) r -> r.getAuthor().stream() - .anyMatch(a -> a.getPid() - .stream() - .anyMatch(p -> p.getQualifier().getClassid().equalsIgnoreCase(ModelConstants.ORCID) || - p.getQualifier().getClassid().equalsIgnoreCase(ModelConstants.ORCID_PENDING)))) - .map((MapFunction>) r -> - new Tuple2<>(r.getId(), getOrcidAuthorsList(r.getAuthor())) - , Encoders.tuple(Encoders.STRING(), Encoders.bean(OrcidAuthors.class))) - .selectExpr("_1 as id", "_2 as orcid_authors"); - - Dataset result = spark.read().schema(Encoders.bean(Result.class).schema()) - .json(inputPath + e.name()) - .as(Encoders.bean(Result.class)) - .selectExpr("id", "author as graph_authors"); - - Dataset supplements = spark.read() - .schema(Encoders.bean(Relation.class).schema()) - .json(inputPath + "relation") - .where("relclass IN('" + ModelConstants.IS_SUPPLEMENT_TO + "', '" + - ModelConstants.IS_SUPPLEMENTED_BY + "')") - .selectExpr("source as id", "target"); - - result - .join(supplements, "id") - .join(orcidDnet, orcidDnet.col("id").equalTo(supplements.col("target"))) - .drop("target") - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .parquet(outputPath + e.name() + "_unmatched"); - - }); - - - } - - - -// override def createTemporaryData(graphPath: String, orcidPath: String, targetPath: String): Unit = { -// val relEnc = Encoders.bean(classOf[Relation]) -// -// ModelSupport.entityTypes.asScala -// .filter(e => ModelSupport.isResult(e._1)) -// .foreach(e => { -// val resultType = e._1.name() -// val enc = Encoders.bean(e._2) -// -// val orcidDnet = spark.read -// .load("$graphPath/$resultType") -// .as[Result] -// .map( -// result => -// ( -// result.getId, -// result.getAuthor.asScala.map(a => OrcidAuthor("extract ORCID", a.getSurname, a.getName, a.getFullname, null)) -// ) -// ) -// .where("size(_2) > 0") -// .selectExpr("_1 as id", "_2 as orcid_authors") -// -// val result = -// spark.read.schema(enc.schema).json(s"$graphPath/$resultType").selectExpr("id", "author as graph_authors") -// -// val supplements = spark.read.schema(relEnc.schema).json(s"$graphPath/relation").where("relclass IN('isSupplementedBy', 'isSupplementOf')").selectExpr("source as id", "target") -// -// result -// .join(supplements, Seq("id")) -// .join(orcidDnet, orcidDnet("id") === col("target")) -// .drop("target") -// .write -// .mode(SaveMode.Overwrite) -// .option("compression", "gzip") -// .parquet(s"$targetPath/${resultType}_unmatched") -// }) -// } - - private static OrcidAuthors getOrcidAuthorsList(List authors) { - OrcidAuthors oas = new OrcidAuthors(); - List tmp = authors.stream().map(SparkPrepareAuthorInfo::getOrcidAuthor).collect(Collectors.toList()); - oas.setOrcidAuthorList(tmp); - return oas; - } - - private static OrcidAuthor getOrcidAuthor(Author a){ - return new OrcidAuthor(getOrcid(a),a.getSurname(), a.getName(), a.getFullname(), null); - - } - - private static String getOrcid(Author a){ - if (a.getPid().stream().anyMatch(p->p.getQualifier().getClassid().equalsIgnoreCase(ModelConstants.ORCID))) - return a.getPid().stream().filter(p->p.getQualifier().getClassid().equalsIgnoreCase(ModelConstants.ORCID)).findFirst().get().getValue(); - return a.getPid().stream().filter(p->p.getQualifier().getClassid().equalsIgnoreCase(ModelConstants.ORCID_PENDING)).findFirst().get().getValue(); - - } -} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkPropagateOrcidAuthor.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkPropagateOrcidAuthor.java new file mode 100644 index 000000000..7e17b6a7b --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkPropagateOrcidAuthor.java @@ -0,0 +1,100 @@ +package eu.dnetlib.dhp.orcidtoresultfromsemrel; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.author.SparkEnrichWithOrcidAuthors; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.common.ModelSupport; +import eu.dnetlib.dhp.schema.oaf.Author; +import eu.dnetlib.dhp.schema.oaf.Relation; +import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.utils.OrcidAuthor; +import org.apache.commons.io.IOUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.FilterFunction; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.*; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import scala.Tuple2; + + +import java.util.List; +import java.util.stream.Collectors; + + +public class SparkPropagateOrcidAuthor extends SparkEnrichWithOrcidAuthors { + private static final Logger log = LoggerFactory.getLogger(SparkPropagateOrcidAuthor.class); + + public SparkPropagateOrcidAuthor(String propertyPath, String[] args, Logger log) { + super(propertyPath, args, log); + } + + public static void main(String[] args) throws Exception { + + // Create instance and run the Spark application + SparkPropagateOrcidAuthor app = new SparkPropagateOrcidAuthor("/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json", args, log); + app.run(); + + } + + private static OrcidAuthors getOrcidAuthorsList(List authors) { + OrcidAuthors oas = new OrcidAuthors(); + List tmp = authors.stream().map(SparkPropagateOrcidAuthor::getOrcidAuthor).collect(Collectors.toList()); + oas.setOrcidAuthorList(tmp); + return oas; + } + + private static OrcidAuthor getOrcidAuthor(Author a){ + return new OrcidAuthor(getOrcid(a),a.getSurname(), a.getName(), a.getFullname(), null); + + } + + private static String getOrcid(Author a){ + if (a.getPid().stream().anyMatch(p->p.getQualifier().getClassid().equalsIgnoreCase(ModelConstants.ORCID))) + return a.getPid().stream().filter(p->p.getQualifier().getClassid().equalsIgnoreCase(ModelConstants.ORCID)).findFirst().get().getValue(); + return a.getPid().stream().filter(p->p.getQualifier().getClassid().equalsIgnoreCase(ModelConstants.ORCID_PENDING)).findFirst().get().getValue(); + + } + + @Override + public void createTemporaryData(SparkSession spark, String graphPath, String orcidPath, String targetPath) { + ModelSupport.entityTypes + .keySet().stream().filter(ModelSupport::isResult) + .forEach(e -> { + Dataset orcidDnet = spark.read().schema(Encoders.bean(Result.class).schema()) + .json(graphPath + e.name()) + .as(Encoders.bean(Result.class)) + .filter((FilterFunction) r -> r.getAuthor().stream() + .anyMatch(a -> a.getPid() + .stream() + .anyMatch(p -> p.getQualifier().getClassid().equalsIgnoreCase(ModelConstants.ORCID) || + p.getQualifier().getClassid().equalsIgnoreCase(ModelConstants.ORCID_PENDING)))) + .map((MapFunction>) r -> + new Tuple2<>(r.getId(), getOrcidAuthorsList(r.getAuthor())) + , Encoders.tuple(Encoders.STRING(), Encoders.bean(OrcidAuthors.class))) + .selectExpr("_1 as target", "_2 as orcid_authors"); + + Dataset result = spark.read().schema(Encoders.bean(Result.class).schema()) + .json(graphPath + e.name()) + .as(Encoders.bean(Result.class)) + .selectExpr("id", "author as graph_authors"); + + Dataset supplements = spark.read() + .schema(Encoders.bean(Relation.class).schema()) + .json(graphPath + "relation") + .where("relclass IN('" + ModelConstants.IS_SUPPLEMENT_TO + "', '" + + ModelConstants.IS_SUPPLEMENTED_BY + "')") + .selectExpr("source as id", "target"); + + result + .join(supplements, "id") + .join(orcidDnet, "target") + .drop("target") + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .parquet(targetPath + e.name() + "_unmatched"); + + }); + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/enrich/orcid/SparkEnrichGraphWithOrcidAuthors.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/enrich/orcid/SparkEnrichGraphWithOrcidAuthors.scala index e615f02d8..f10a9e56f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/enrich/orcid/SparkEnrichGraphWithOrcidAuthors.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/enrich/orcid/SparkEnrichGraphWithOrcidAuthors.scala @@ -1,8 +1,7 @@ package eu.dnetlib.dhp.enrich.orcid -import eu.dnetlib.dhp.application.AbstractScalaApplication +import eu.dnetlib.dhp.common.author.SparkEnrichWithOrcidAuthors import eu.dnetlib.dhp.schema.common.ModelSupport -import eu.dnetlib.dhp.utils.{MatchData, ORCIDAuthorEnricher, ORCIDAuthorEnricherResult} import org.apache.spark.sql._ import org.apache.spark.sql.functions._ import org.slf4j.{Logger, LoggerFactory} @@ -10,59 +9,9 @@ import org.slf4j.{Logger, LoggerFactory} import scala.collection.JavaConverters._ class SparkEnrichGraphWithOrcidAuthors(propertyPath: String, args: Array[String], log: Logger) - extends AbstractScalaApplication(propertyPath, args, log: Logger) { + extends SparkEnrichWithOrcidAuthors(propertyPath, args, log: Logger) { - /** Here all the spark applications runs this method - * where the whole logic of the spark node is defined - */ - override def run(): Unit = { - val graphPath = parser.get("graphPath") - log.info(s"graphPath is '$graphPath'") - val orcidPath = parser.get("orcidPath") - log.info(s"orcidPath is '$orcidPath'") - val targetPath = parser.get("targetPath") - log.info(s"targetPath is '$targetPath'") - val workingDir = parser.get("workingDir") - log.info(s"targetPath is '$workingDir'") - - createTemporaryData(graphPath, orcidPath, workingDir) - analisys(workingDir) - generateGraph(graphPath, workingDir, targetPath) - } - - private def generateGraph(graphPath: String, workingDir: String, targetPath: String): Unit = { - - ModelSupport.entityTypes.asScala - .filter(e => ModelSupport.isResult(e._1)) - .foreach(e => { - val resultType = e._1.name() - val enc = Encoders.bean(e._2) - - val matched = spark.read - .schema(Encoders.bean(classOf[ORCIDAuthorEnricherResult]).schema) - .parquet(s"${workingDir}/${resultType}_matched") - .selectExpr("id", "enriched_author") - - spark.read - .schema(enc.schema) - .json(s"$graphPath/$resultType") - .join(matched, Seq("id"), "left") - .withColumn( - "author", - when(size(col("enriched_author")).gt(0), col("enriched_author")) - .otherwise(col("author")) - ) - .drop("enriched_author") - .write - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .json(s"${targetPath}/${resultType}") - - }) - - } - - def createTemporaryData(graphPath: String, orcidPath: String, targetPath: String): Unit = { + override def createTemporaryData(spark:SparkSession, graphPath: String, orcidPath: String, targetPath: String): Unit = { val orcidAuthors = spark.read.load(s"$orcidPath/Authors").select("orcid", "familyName", "givenName", "creditName", "otherNames") @@ -131,35 +80,16 @@ class SparkEnrichGraphWithOrcidAuthors(propertyPath: String, args: Array[String] orcidWorksWithAuthors.unpersist() } - - private def analisys(targetPath: String): Unit = { - ModelSupport.entityTypes.asScala - .filter(e => ModelSupport.isResult(e._1)) - .foreach(e => { - val resultType = e._1.name() - - spark.read - .parquet(s"$targetPath/${resultType}_unmatched") - .where("size(graph_authors) > 0") - .as[MatchData](Encoders.bean(classOf[MatchData])) - .map(md => { - ORCIDAuthorEnricher.enrichOrcid(md.id, md.graph_authors, md.orcid_authors) - })(Encoders.bean(classOf[ORCIDAuthorEnricherResult])) - .write - .option("compression", "gzip") - .mode("overwrite") - .parquet(s"$targetPath/${resultType}_matched") - }) - } } -object SparkEnrichGraphWithOrcidAuthors { + object SparkEnrichGraphWithOrcidAuthors { - val log: Logger = LoggerFactory.getLogger(SparkEnrichGraphWithOrcidAuthors.getClass) + val log: Logger = LoggerFactory.getLogger(SparkEnrichGraphWithOrcidAuthors.getClass) - def main(args: Array[String]): Unit = { - new SparkEnrichGraphWithOrcidAuthors("/eu/dnetlib/dhp/enrich/orcid/enrich_graph_orcid_parameters.json", args, log) - .initialize() - .run() + def main(args: Array[String]): Unit = { + new SparkEnrichGraphWithOrcidAuthors("/eu/dnetlib/dhp/enrich/orcid/enrich_graph_orcid_parameters.json", args, log) + .initialize() + .run() + } } -} + diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/enrich/orcid/SparkPropagateOrcidAuthors.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/enrich/orcid/SparkPropagateOrcidAuthors.scala deleted file mode 100644 index 2511830a2..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/enrich/orcid/SparkPropagateOrcidAuthors.scala +++ /dev/null @@ -1,63 +0,0 @@ -package eu.dnetlib.dhp.enrich.orcid - -import eu.dnetlib.dhp.schema.common.ModelSupport -import eu.dnetlib.dhp.schema.oaf.{Relation, Result} -import eu.dnetlib.dhp.utils.OrcidAuthor -import org.apache.spark.sql._ -import org.apache.spark.sql.functions._ -import org.slf4j.{Logger, LoggerFactory} - -import scala.collection.JavaConverters._ - -class SparkPropagateOrcidAuthors(propertyPath: String, args: Array[String], log: Logger) - extends SparkEnrichGraphWithOrcidAuthors(propertyPath, args, log: Logger) { - - override def createTemporaryData(graphPath: String, orcidPath: String, targetPath: String): Unit = { - val relEnc = Encoders.bean(classOf[Relation]) - - ModelSupport.entityTypes.asScala - .filter(e => ModelSupport.isResult(e._1)) - .foreach(e => { - val resultType = e._1.name() - val enc = Encoders.bean(e._2) - - val orcidDnet = spark.read - .load("$graphPath/$resultType") - .as[Result] - .map( - result => - ( - result.getId, - result.getAuthor.asScala.map(a => OrcidAuthor("extract ORCID", a.getSurname, a.getName, a.getFullname, null)) - ) - ) - .where("size(_2) > 0") - .selectExpr("_1 as id", "_2 as orcid_authors") - - val result = - spark.read.schema(enc.schema).json(s"$graphPath/$resultType").selectExpr("id", "author as graph_authors") - - val supplements = spark.read.schema(relEnc.schema).json(s"$graphPath/relation").where("relclass IN('isSupplementedBy', 'isSupplementOf')").selectExpr("source as id", "target") - - result - .join(supplements, Seq("id")) - .join(orcidDnet, orcidDnet("id") === col("target")) - .drop("target") - .write - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .parquet(s"$targetPath/${resultType}_unmatched") - }) - } -} - -object SparkPropagateOrcidAuthors { - - val log: Logger = LoggerFactory.getLogger(SparkPropagateOrcidAuthors.getClass) - - def main(args: Array[String]): Unit = { - new SparkPropagateOrcidAuthors("/eu/dnetlib/dhp/enrich/orcid/enrich_graph_orcid_parameters.json", args, log) - .initialize() - .run() - } -} From 1b4bbb2691e2cced7ca81f42e7a1b7fed9befc89 Mon Sep 17 00:00:00 2001 From: Miriam Baglioni Date: Wed, 6 Nov 2024 10:16:54 +0100 Subject: [PATCH 65/73] [orcidenrichment] refactoring --- .../dhp/common/enrichment/Constants.java | 5 +++ .../author/SparkEnrichWithOrcidAuthors.scala | 20 +++++++---- .../dhp/utils/ORCIDAuthorEnricher.scala | 34 ++++++++++++++----- .../eu/dnetlib/dhp/PropagationConstant.java | 4 ++- .../SparkPropagateOrcidAuthor.java | 15 +++++--- 5 files changed, 56 insertions(+), 22 deletions(-) create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/enrichment/Constants.java diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/enrichment/Constants.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/enrichment/Constants.java new file mode 100644 index 000000000..df433dddb --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/enrichment/Constants.java @@ -0,0 +1,5 @@ +package eu.dnetlib.dhp.common.enrichment; + +public class Constants { + public static final String PROPAGATION_DATA_INFO_TYPE = "propagation"; +} diff --git a/dhp-common/src/main/scala/eu/dnetlib/dhp/common/author/SparkEnrichWithOrcidAuthors.scala b/dhp-common/src/main/scala/eu/dnetlib/dhp/common/author/SparkEnrichWithOrcidAuthors.scala index 7e62e2012..47d563649 100644 --- a/dhp-common/src/main/scala/eu/dnetlib/dhp/common/author/SparkEnrichWithOrcidAuthors.scala +++ b/dhp-common/src/main/scala/eu/dnetlib/dhp/common/author/SparkEnrichWithOrcidAuthors.scala @@ -1,11 +1,12 @@ package eu.dnetlib.dhp.common.author import eu.dnetlib.dhp.application.AbstractScalaApplication -import eu.dnetlib.dhp.schema.common.ModelSupport +import eu.dnetlib.dhp.schema.common.{ModelConstants, ModelSupport} import eu.dnetlib.dhp.utils.{MatchData, ORCIDAuthorEnricher, ORCIDAuthorEnricherResult} import org.apache.spark.sql._ import org.apache.spark.sql.functions._ import org.slf4j.{Logger, LoggerFactory} +import eu.dnetlib.dhp.common.enrichment.Constants.PROPAGATION_DATA_INFO_TYPE import scala.collection.JavaConverters._ @@ -24,13 +25,18 @@ abstract class SparkEnrichWithOrcidAuthors(propertyPath: String, args: Array[Str log.info(s"targetPath is '$targetPath'") val workingDir = parser.get("workingDir") log.info(s"targetPath is '$workingDir'") + val classid = Option(parser.get("matchingSource")).map(_=>ModelConstants.ORCID_PENDING).getOrElse(ModelConstants.ORCID) - createTemporaryData(graphPath, orcidPath, workingDir) - analisys(workingDir) - generateGraph(graphPath, workingDir, targetPath) + log.info(s"classid is '$classid'") + val provenance = Option(parser.get("matchingSource")).map(_=>PROPAGATION_DATA_INFO_TYPE).getOrElse("ORCID_ENRICHMENT") + log.info(s"targetPath is '$workingDir'") + + createTemporaryData(spark, graphPath, orcidPath, workingDir) + analisys(workingDir,classid,provenance) + generateGraph(spark, graphPath, workingDir, targetPath) } - private def generateGraph(graphPath: String, workingDir: String, targetPath: String): Unit = { + private def generateGraph(spark: SparkSession, graphPath: String, workingDir: String, targetPath: String): Unit = { ModelSupport.entityTypes.asScala .filter(e => ModelSupport.isResult(e._1)) @@ -64,7 +70,7 @@ abstract class SparkEnrichWithOrcidAuthors(propertyPath: String, args: Array[Str def createTemporaryData(spark: SparkSession, graphPath: String, orcidPath: String, targetPath: String): Unit - private def analisys(targetPath: String): Unit = { + private def analisys(targetPath: String, classid:String, provenance:String): Unit = { ModelSupport.entityTypes.asScala .filter(e => ModelSupport.isResult(e._1)) .foreach(e => { @@ -75,7 +81,7 @@ abstract class SparkEnrichWithOrcidAuthors(propertyPath: String, args: Array[Str .where("size(graph_authors) > 0") .as[MatchData](Encoders.bean(classOf[MatchData])) .map(md => { - ORCIDAuthorEnricher.enrichOrcid(md.id, md.graph_authors, md.orcid_authors) + ORCIDAuthorEnricher.enrichOrcid(md.id, md.graph_authors, md.orcid_authors, classid, provenance) })(Encoders.bean(classOf[ORCIDAuthorEnricherResult])) .write .option("compression", "gzip") diff --git a/dhp-common/src/main/scala/eu/dnetlib/dhp/utils/ORCIDAuthorEnricher.scala b/dhp-common/src/main/scala/eu/dnetlib/dhp/utils/ORCIDAuthorEnricher.scala index 6a087b1de..ff9715c07 100644 --- a/dhp-common/src/main/scala/eu/dnetlib/dhp/utils/ORCIDAuthorEnricher.scala +++ b/dhp-common/src/main/scala/eu/dnetlib/dhp/utils/ORCIDAuthorEnricher.scala @@ -46,7 +46,9 @@ object ORCIDAuthorEnricher extends Serializable { def enrichOrcid( id: String, graph_authors: java.util.List[Author], - orcid_authors: java.util.List[OrcidAuthor] + orcid_authors: java.util.List[OrcidAuthor], + classid:String, + provenance:String ): ORCIDAuthorEnricherResult = { // Author enriching strategy: // 1) create a copy of graph author list in unmatched_authors @@ -64,7 +66,9 @@ object ORCIDAuthorEnricher extends Serializable { orcid_authors, (author, orcid) => AuthorMatchers.matchEqualsIgnoreCase(author.getFullname, orcid.givenName + " " + orcid.familyName), - "fullName" + "fullName", + classid, + provenance ) ++ // Look after exact reversed fullname match, reconstruct ORCID fullname as familyName + givenName extractAndEnrichMatches( @@ -72,7 +76,9 @@ object ORCIDAuthorEnricher extends Serializable { orcid_authors, (author, orcid) => AuthorMatchers.matchEqualsIgnoreCase(author.getFullname, orcid.familyName + " " + orcid.givenName), - "reversedFullName" + "reversedFullName", + classid, + provenance ) ++ // split author names in tokens, order the tokens, then check for matches of full tokens or abbreviations extractAndEnrichMatches( @@ -81,14 +87,18 @@ object ORCIDAuthorEnricher extends Serializable { (author, orcid) => AuthorMatchers .matchOrderedTokenAndAbbreviations(author.getFullname, orcid.givenName + " " + orcid.familyName), - "orderedTokens" + "orderedTokens", + classid, + provenance ) ++ // look after exact matches of ORCID creditName extractAndEnrichMatches( unmatched_authors, orcid_authors, (author, orcid) => AuthorMatchers.matchEqualsIgnoreCase(author.getFullname, orcid.creditName), - "creditName" + "creditName", + classid, + provenance ) ++ // look after exact matches in ORCID otherNames extractAndEnrichMatches( @@ -96,7 +106,9 @@ object ORCIDAuthorEnricher extends Serializable { orcid_authors, (author, orcid) => orcid.otherNames != null && AuthorMatchers.matchOtherNames(author.getFullname, orcid.otherNames.asScala), - "otherNames" + "otherNames", + classid, + provenance ) } @@ -107,7 +119,9 @@ object ORCIDAuthorEnricher extends Serializable { graph_authors: java.util.List[Author], orcid_authors: java.util.List[OrcidAuthor], matchingFunc: (Author, OrcidAuthor) => Boolean, - matchName: String + matchName: String, + classid:String, + provenance : String ) = { val matched = scala.collection.mutable.ArrayBuffer.empty[MatchedAuthors] @@ -131,10 +145,12 @@ object ORCIDAuthorEnricher extends Serializable { author.setPid(new util.ArrayList[StructuredProperty]()) } - val orcidPID = OafUtils.createSP(orcid.orcid, ModelConstants.ORCID, ModelConstants.ORCID) + val orcidPID = OafUtils.createSP(orcid.orcid, classid, classid) + //val orcidPID = OafUtils.createSP(orcid.orcid, ModelConstants.ORCID, ModelConstants.ORCID) orcidPID.setDataInfo(OafUtils.generateDataInfo()) orcidPID.getDataInfo.setProvenanceaction( - OafUtils.createQualifier("ORCID_ENRICHMENT", "ORCID_ENRICHMENT") + //OafUtils.createQualifier("ORCID_ENRICHMENT", "ORCID_ENRICHMENT") + OafUtils.createQualifier(provenance, provenance) ) author.getPid.add(orcidPID) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java index 7c4186663..8dd6e38ba 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java @@ -21,6 +21,8 @@ import eu.dnetlib.dhp.schema.oaf.DataInfo; import eu.dnetlib.dhp.schema.oaf.Qualifier; import eu.dnetlib.dhp.schema.oaf.Relation; +import static eu.dnetlib.dhp.common.enrichment.Constants.PROPAGATION_DATA_INFO_TYPE; + public class PropagationConstant { private PropagationConstant() { @@ -46,7 +48,7 @@ public class PropagationConstant { public static final String INSTITUTIONAL_REPO_TYPE = "institutional"; - public static final String PROPAGATION_DATA_INFO_TYPE = "propagation"; + //public static final String PROPAGATION_DATA_INFO_TYPE = "propagation"; public static final String TRUE = "true"; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkPropagateOrcidAuthor.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkPropagateOrcidAuthor.java index 7e17b6a7b..9f7b2f29e 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkPropagateOrcidAuthor.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkPropagateOrcidAuthor.java @@ -8,8 +8,6 @@ import eu.dnetlib.dhp.schema.oaf.Author; import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.utils.OrcidAuthor; -import org.apache.commons.io.IOUtils; -import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FilterFunction; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.*; @@ -19,6 +17,8 @@ import scala.Tuple2; import java.util.List; +import java.util.Objects; +import java.util.Optional; import java.util.stream.Collectors; @@ -39,20 +39,25 @@ public class SparkPropagateOrcidAuthor extends SparkEnrichWithOrcidAuthors { private static OrcidAuthors getOrcidAuthorsList(List authors) { OrcidAuthors oas = new OrcidAuthors(); - List tmp = authors.stream().map(SparkPropagateOrcidAuthor::getOrcidAuthor).collect(Collectors.toList()); + List tmp = authors.stream().map(SparkPropagateOrcidAuthor::getOrcidAuthor) + .filter(Objects::nonNull).collect(Collectors.toList()); oas.setOrcidAuthorList(tmp); return oas; } private static OrcidAuthor getOrcidAuthor(Author a){ - return new OrcidAuthor(getOrcid(a),a.getSurname(), a.getName(), a.getFullname(), null); + return Optional.ofNullable(getOrcid(a)) + .map(orcid -> new OrcidAuthor(orcid,a.getSurname(), a.getName(), a.getFullname(), null)) + .orElse(null); } private static String getOrcid(Author a){ if (a.getPid().stream().anyMatch(p->p.getQualifier().getClassid().equalsIgnoreCase(ModelConstants.ORCID))) return a.getPid().stream().filter(p->p.getQualifier().getClassid().equalsIgnoreCase(ModelConstants.ORCID)).findFirst().get().getValue(); - return a.getPid().stream().filter(p->p.getQualifier().getClassid().equalsIgnoreCase(ModelConstants.ORCID_PENDING)).findFirst().get().getValue(); + if (a.getPid().stream().anyMatch(p->p.getQualifier().getClassid().equalsIgnoreCase(ModelConstants.ORCID_PENDING))) + return a.getPid().stream().filter(p->p.getQualifier().getClassid().equalsIgnoreCase(ModelConstants.ORCID_PENDING)).findFirst().get().getValue(); + return null; } From f9531e04066c96ffe7daed56b782963d24fad1f6 Mon Sep 17 00:00:00 2001 From: Miriam Baglioni Date: Wed, 6 Nov 2024 14:15:34 +0100 Subject: [PATCH 66/73] [orcidenrichment] refactoring --- .../input_orcidtoresult_parameters.json | 23 +- .../oozie_app/workflow.xml | 247 +----------------- 2 files changed, 26 insertions(+), 244 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json index 125a82037..bae6dedc5 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json @@ -1,21 +1,32 @@ [ { "paramName":"s", - "paramLongName":"sourcePath", + "paramLongName":"graphPath", "paramDescription": "the path of the sequencial file to read", "paramRequired": true }, { "paramName": "out", - "paramLongName": "outputPath", + "paramLongName": "targetPath", "paramDescription": "the path used to store temporary output files", "paramRequired": true - }, + }, { + "paramName": "o", + "paramLongName": "orcidPath", + "paramDescription": "the path used to store temporary output files", + "paramRequired": true +}, { + "paramName": "w", + "paramLongName": "workingDir", + "paramDescription": "the path used to store temporary output files", + "paramRequired": true +}, { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "true if the spark session is managed, false otherwise", + "paramName": "m", + "paramLongName": "matchingSource", + "paramDescription": "the path used to store temporary output files", "paramRequired": false } + ] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml index 8eaa79c53..211ab0200 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml @@ -92,21 +92,14 @@ - + - - - - - - - - + yarn cluster ORCIDPropagation-PreparePhase1-Publications - eu.dnetlib.dhp.orcidtoresultfromsemrel.PrepareResultOrcidAssociationStep1 + eu.dnetlib.dhp.orcidtoresultfromsemrel.SparkPropagateOrcidAuthor dhp-enrichment-${projectVersion}.jar --executor-cores=${sparkExecutorCores} @@ -119,239 +112,17 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=8000 - --sourcePath${sourcePath} - --hive_metastore_uris${hive_metastore_uris} - --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --outputPath${workingDir}/orcid/targetOrcidAssoc - --allowedsemrels${allowedsemrels} + --graphPath${sourcePath}/ + --orcidPath${sourcePath}/ + --workingDir${workingDir}/ + --targetPath${outputPath}/ + --matchingSourcegraph - + - - - yarn - cluster - ORCIDPropagation-PreparePhase1-Dataset - eu.dnetlib.dhp.orcidtoresultfromsemrel.PrepareResultOrcidAssociationStep1 - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.executor.memoryOverhead=${sparkExecutorMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --sourcePath${sourcePath} - --hive_metastore_uris${hive_metastore_uris} - --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --outputPath${workingDir}/orcid/targetOrcidAssoc - --allowedsemrels${allowedsemrels} - - - - - - - yarn - cluster - ORCIDPropagation-PreparePhase1-ORP - eu.dnetlib.dhp.orcidtoresultfromsemrel.PrepareResultOrcidAssociationStep1 - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.executor.memoryOverhead=${sparkExecutorMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --sourcePath${sourcePath} - --hive_metastore_uris${hive_metastore_uris} - --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath${workingDir}/orcid/targetOrcidAssoc - --allowedsemrels${allowedsemrels} - - - - - - - - yarn - cluster - ORCIDPropagation-PreparePhase1-Software - eu.dnetlib.dhp.orcidtoresultfromsemrel.PrepareResultOrcidAssociationStep1 - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.executor.memoryOverhead=${sparkExecutorMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --sourcePath${sourcePath} - --hive_metastore_uris${hive_metastore_uris} - --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --outputPath${workingDir}/orcid/targetOrcidAssoc - --allowedsemrels${allowedsemrels} - - - - - - - - - - yarn - cluster - ORCIDPropagation-PreparePhase2 - eu.dnetlib.dhp.orcidtoresultfromsemrel.PrepareResultOrcidAssociationStep2 - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.executor.memoryOverhead=${sparkExecutorMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --sourcePath${workingDir}/orcid/targetOrcidAssoc - --outputPath${workingDir}/orcid/mergedOrcidAssoc - - - - - - - - - - - - - - - yarn - cluster - ORCIDPropagation-Publication - eu.dnetlib.dhp.orcidtoresultfromsemrel.SparkOrcidToResultFromSemRelJob - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.executor.memoryOverhead=${sparkExecutorMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.shuffle.partitions=15000 - - --possibleUpdatesPath${workingDir}/orcid/mergedOrcidAssoc - --sourcePath${sourcePath}/publication - --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --outputPath${outputPath}/publication - - - - - - - - yarn - cluster - ORCIDPropagation-Dataset - eu.dnetlib.dhp.orcidtoresultfromsemrel.SparkOrcidToResultFromSemRelJob - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.executor.memoryOverhead=${sparkExecutorMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.shuffle.partitions=8000 - - --possibleUpdatesPath${workingDir}/orcid/mergedOrcidAssoc - --sourcePath${sourcePath}/dataset - --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --outputPath${outputPath}/dataset - - - - - - - - yarn - cluster - ORCIDPropagation-ORP - eu.dnetlib.dhp.orcidtoresultfromsemrel.SparkOrcidToResultFromSemRelJob - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.executor.memoryOverhead=${sparkExecutorMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.shuffle.partitions=8000 - - --possibleUpdatesPath${workingDir}/orcid/mergedOrcidAssoc - --sourcePath${sourcePath}/otherresearchproduct - --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath${outputPath}/otherresearchproduct - - - - - - - - yarn - cluster - ORCIDPropagation-Software - eu.dnetlib.dhp.orcidtoresultfromsemrel.SparkOrcidToResultFromSemRelJob - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.executor.memoryOverhead=${sparkExecutorMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.shuffle.partitions=4000 - - --possibleUpdatesPath${workingDir}/orcid/mergedOrcidAssoc - --sourcePath${sourcePath}/software - --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --outputPath${outputPath}/software - - - - - - From 0a0f820dc7589a698cef86c907eb0c3562f077b6 Mon Sep 17 00:00:00 2001 From: Miriam Baglioni Date: Wed, 6 Nov 2024 16:36:34 +0100 Subject: [PATCH 67/73] [orcidenrichment] fixing issue --- .../SparkPropagateOrcidAuthor.java | 10 +++++----- .../orcidtoresultfromsemrel/oozie_app/workflow.xml | 8 ++++---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkPropagateOrcidAuthor.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkPropagateOrcidAuthor.java index 9f7b2f29e..69aadd108 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkPropagateOrcidAuthor.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkPropagateOrcidAuthor.java @@ -33,7 +33,7 @@ public class SparkPropagateOrcidAuthor extends SparkEnrichWithOrcidAuthors { // Create instance and run the Spark application SparkPropagateOrcidAuthor app = new SparkPropagateOrcidAuthor("/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json", args, log); - app.run(); + app.initialize().run(); } @@ -67,7 +67,7 @@ public class SparkPropagateOrcidAuthor extends SparkEnrichWithOrcidAuthors { .keySet().stream().filter(ModelSupport::isResult) .forEach(e -> { Dataset orcidDnet = spark.read().schema(Encoders.bean(Result.class).schema()) - .json(graphPath + e.name()) + .json(graphPath + "/"+ e.name()) .as(Encoders.bean(Result.class)) .filter((FilterFunction) r -> r.getAuthor().stream() .anyMatch(a -> a.getPid() @@ -80,13 +80,13 @@ public class SparkPropagateOrcidAuthor extends SparkEnrichWithOrcidAuthors { .selectExpr("_1 as target", "_2 as orcid_authors"); Dataset result = spark.read().schema(Encoders.bean(Result.class).schema()) - .json(graphPath + e.name()) + .json(graphPath + "/"+ e.name()) .as(Encoders.bean(Result.class)) .selectExpr("id", "author as graph_authors"); Dataset supplements = spark.read() .schema(Encoders.bean(Relation.class).schema()) - .json(graphPath + "relation") + .json(graphPath + "/"+ "relation") .where("relclass IN('" + ModelConstants.IS_SUPPLEMENT_TO + "', '" + ModelConstants.IS_SUPPLEMENTED_BY + "')") .selectExpr("source as id", "target"); @@ -98,7 +98,7 @@ public class SparkPropagateOrcidAuthor extends SparkEnrichWithOrcidAuthors { .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") - .parquet(targetPath + e.name() + "_unmatched"); + .parquet(targetPath + "/"+ e.name() + "_unmatched"); }); } diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml index 211ab0200..6feb22a20 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml @@ -112,10 +112,10 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=8000 - --graphPath${sourcePath}/ - --orcidPath${sourcePath}/ - --workingDir${workingDir}/ - --targetPath${outputPath}/ + --graphPath${sourcePath} + --orcidPath${sourcePath} + --workingDir${workingDir} + --targetPath${outputPath} --matchingSourcegraph From fbc19ce4a803e4c5351d8402f5dd16435835ade6 Mon Sep 17 00:00:00 2001 From: Miriam Baglioni Date: Thu, 7 Nov 2024 13:36:24 +0100 Subject: [PATCH 68/73] [orcidenrichment] fixing issue --- .../SparkPropagateOrcidAuthor.java | 192 +++++++++++------- 1 file changed, 115 insertions(+), 77 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkPropagateOrcidAuthor.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkPropagateOrcidAuthor.java index 69aadd108..5c13cba3c 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkPropagateOrcidAuthor.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkPropagateOrcidAuthor.java @@ -1,5 +1,16 @@ package eu.dnetlib.dhp.orcidtoresultfromsemrel; +import java.util.List; +import java.util.Objects; +import java.util.Optional; +import java.util.stream.Collectors; + +import org.apache.spark.api.java.function.FilterFunction; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.*; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.author.SparkEnrichWithOrcidAuthors; import eu.dnetlib.dhp.schema.common.ModelConstants; @@ -8,98 +19,125 @@ import eu.dnetlib.dhp.schema.oaf.Author; import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.utils.OrcidAuthor; -import org.apache.spark.api.java.function.FilterFunction; -import org.apache.spark.api.java.function.MapFunction; -import org.apache.spark.sql.*; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import scala.Tuple2; - -import java.util.List; -import java.util.Objects; -import java.util.Optional; -import java.util.stream.Collectors; - - public class SparkPropagateOrcidAuthor extends SparkEnrichWithOrcidAuthors { - private static final Logger log = LoggerFactory.getLogger(SparkPropagateOrcidAuthor.class); + private static final Logger log = LoggerFactory.getLogger(SparkPropagateOrcidAuthor.class); - public SparkPropagateOrcidAuthor(String propertyPath, String[] args, Logger log) { - super(propertyPath, args, log); - } + public SparkPropagateOrcidAuthor(String propertyPath, String[] args, Logger log) { + super(propertyPath, args, log); + } - public static void main(String[] args) throws Exception { + public static void main(String[] args) throws Exception { - // Create instance and run the Spark application - SparkPropagateOrcidAuthor app = new SparkPropagateOrcidAuthor("/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json", args, log); - app.initialize().run(); + // Create instance and run the Spark application + SparkPropagateOrcidAuthor app = new SparkPropagateOrcidAuthor( + "/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json", args, log); + app.initialize().run(); - } + } - private static OrcidAuthors getOrcidAuthorsList(List authors) { - OrcidAuthors oas = new OrcidAuthors(); - List tmp = authors.stream().map(SparkPropagateOrcidAuthor::getOrcidAuthor) - .filter(Objects::nonNull).collect(Collectors.toList()); - oas.setOrcidAuthorList(tmp); - return oas; - } + private static OrcidAuthors getOrcidAuthorsList(List authors) { + OrcidAuthors oas = new OrcidAuthors(); + List tmp = authors + .stream() + .map(SparkPropagateOrcidAuthor::getOrcidAuthor) + .filter(Objects::nonNull) + .collect(Collectors.toList()); + oas.setOrcidAuthorList(tmp); + return oas; + } - private static OrcidAuthor getOrcidAuthor(Author a){ - return Optional.ofNullable(getOrcid(a)) - .map(orcid -> new OrcidAuthor(orcid,a.getSurname(), a.getName(), a.getFullname(), null)) - .orElse(null); + private static OrcidAuthor getOrcidAuthor(Author a) { + return Optional + .ofNullable(getOrcid(a)) + .map(orcid -> new OrcidAuthor(orcid, a.getSurname(), a.getName(), a.getFullname(), null)) + .orElse(null); - } + } - private static String getOrcid(Author a){ - if (a.getPid().stream().anyMatch(p->p.getQualifier().getClassid().equalsIgnoreCase(ModelConstants.ORCID))) - return a.getPid().stream().filter(p->p.getQualifier().getClassid().equalsIgnoreCase(ModelConstants.ORCID)).findFirst().get().getValue(); - if (a.getPid().stream().anyMatch(p->p.getQualifier().getClassid().equalsIgnoreCase(ModelConstants.ORCID_PENDING))) - return a.getPid().stream().filter(p->p.getQualifier().getClassid().equalsIgnoreCase(ModelConstants.ORCID_PENDING)).findFirst().get().getValue(); - return null; + private static String getOrcid(Author a) { + if (a.getPid().stream().anyMatch(p -> p.getQualifier().getClassid().equalsIgnoreCase(ModelConstants.ORCID))) + return a + .getPid() + .stream() + .filter(p -> p.getQualifier().getClassid().equalsIgnoreCase(ModelConstants.ORCID)) + .findFirst() + .get() + .getValue(); + if (a + .getPid() + .stream() + .anyMatch(p -> p.getQualifier().getClassid().equalsIgnoreCase(ModelConstants.ORCID_PENDING))) + return a + .getPid() + .stream() + .filter(p -> p.getQualifier().getClassid().equalsIgnoreCase(ModelConstants.ORCID_PENDING)) + .findFirst() + .get() + .getValue(); + return null; - } + } - @Override - public void createTemporaryData(SparkSession spark, String graphPath, String orcidPath, String targetPath) { - ModelSupport.entityTypes - .keySet().stream().filter(ModelSupport::isResult) - .forEach(e -> { - Dataset orcidDnet = spark.read().schema(Encoders.bean(Result.class).schema()) - .json(graphPath + "/"+ e.name()) - .as(Encoders.bean(Result.class)) - .filter((FilterFunction) r -> r.getAuthor().stream() - .anyMatch(a -> a.getPid() - .stream() - .anyMatch(p -> p.getQualifier().getClassid().equalsIgnoreCase(ModelConstants.ORCID) || - p.getQualifier().getClassid().equalsIgnoreCase(ModelConstants.ORCID_PENDING)))) - .map((MapFunction>) r -> - new Tuple2<>(r.getId(), getOrcidAuthorsList(r.getAuthor())) - , Encoders.tuple(Encoders.STRING(), Encoders.bean(OrcidAuthors.class))) - .selectExpr("_1 as target", "_2 as orcid_authors"); + @Override + public void createTemporaryData(SparkSession spark, String graphPath, String orcidPath, String targetPath) { + ModelSupport.entityTypes + .keySet() + .stream() + .filter(ModelSupport::isResult) + .forEach(e -> { + Dataset orcidDnet = spark + .read() + .schema(Encoders.bean(Result.class).schema()) + .json(graphPath + "/" + e.name()) + .as(Encoders.bean(Result.class)) + .filter( + (FilterFunction) r -> r.getAuthor() != null && + r + .getAuthor() + .stream() + .anyMatch( + a -> a.getPid() != null && a + .getPid() + .stream() + .anyMatch( + p -> p.getQualifier().getClassid().equalsIgnoreCase(ModelConstants.ORCID) || + p + .getQualifier() + .getClassid() + .equalsIgnoreCase(ModelConstants.ORCID_PENDING)))) + .map( + (MapFunction>) r -> new Tuple2<>(r.getId(), + getOrcidAuthorsList(r.getAuthor())), + Encoders.tuple(Encoders.STRING(), Encoders.bean(OrcidAuthors.class))) + .selectExpr("_1 as target", "_2.orcidAuthorList as orcid_authors"); - Dataset result = spark.read().schema(Encoders.bean(Result.class).schema()) - .json(graphPath + "/"+ e.name()) - .as(Encoders.bean(Result.class)) - .selectExpr("id", "author as graph_authors"); + Dataset result = spark + .read() + .schema(Encoders.bean(Result.class).schema()) + .json(graphPath + "/" + e.name()) + .as(Encoders.bean(Result.class)) + .selectExpr("id", "author as graph_authors"); - Dataset supplements = spark.read() - .schema(Encoders.bean(Relation.class).schema()) - .json(graphPath + "/"+ "relation") - .where("relclass IN('" + ModelConstants.IS_SUPPLEMENT_TO + "', '" + - ModelConstants.IS_SUPPLEMENTED_BY + "')") - .selectExpr("source as id", "target"); + Dataset supplements = spark + .read() + .schema(Encoders.bean(Relation.class).schema()) + .json(graphPath + "/" + "relation") + .where( + "relclass IN('" + ModelConstants.IS_SUPPLEMENT_TO + "', '" + + ModelConstants.IS_SUPPLEMENTED_BY + "')") + .selectExpr("source as id", "target"); - result - .join(supplements, "id") - .join(orcidDnet, "target") - .drop("target") - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .parquet(targetPath + "/"+ e.name() + "_unmatched"); + result + .join(supplements, "id") + .join(orcidDnet, "target") + .drop("target") + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .parquet(targetPath + "/" + e.name() + "_unmatched"); - }); - } + }); + } } From e03e8a39c084b21bc6e4cb99b74a7a4e574c6c7c Mon Sep 17 00:00:00 2001 From: Giambattista Bloisi Date: Thu, 21 Nov 2024 16:17:08 +0100 Subject: [PATCH 69/73] [orcidenrichment] Do not match in case of ambiguity: two authors match and at least one of them has affiliation string --- .../dhp/utils/ORCIDAuthorEnricher.scala | 112 ++++++++++-------- 1 file changed, 65 insertions(+), 47 deletions(-) diff --git a/dhp-common/src/main/scala/eu/dnetlib/dhp/utils/ORCIDAuthorEnricher.scala b/dhp-common/src/main/scala/eu/dnetlib/dhp/utils/ORCIDAuthorEnricher.scala index ff9715c07..81d7bf81a 100644 --- a/dhp-common/src/main/scala/eu/dnetlib/dhp/utils/ORCIDAuthorEnricher.scala +++ b/dhp-common/src/main/scala/eu/dnetlib/dhp/utils/ORCIDAuthorEnricher.scala @@ -7,6 +7,7 @@ import eu.dnetlib.dhp.schema.sx.OafUtils import java.util import scala.beans.BeanProperty import scala.collection.JavaConverters._ +import scala.collection.mutable.ArrayBuffer import scala.util.control.Breaks.{break, breakable} case class OrcidAuthor( @@ -20,9 +21,9 @@ case class OrcidAuthor( } case class MatchedAuthors( - @BeanProperty var author: Author, - @BeanProperty var orcid: OrcidAuthor, - @BeanProperty var `type`: String + @BeanProperty var author: Author, + @BeanProperty var orcid: OrcidAuthor, + @BeanProperty var `type`: String ) case class MatchData( @@ -47,13 +48,14 @@ object ORCIDAuthorEnricher extends Serializable { id: String, graph_authors: java.util.List[Author], orcid_authors: java.util.List[OrcidAuthor], - classid:String, - provenance:String + classid: String, + provenance: String ): ORCIDAuthorEnricherResult = { // Author enriching strategy: // 1) create a copy of graph author list in unmatched_authors // 2) find best match in unmatched_authors, remove it from unmatched_authors and enrich it so - // that the enrichment is reflected in graph_authors (they share author instances) + // that the enrichment is reflected in graph_authors (they share author instances). + // Do not match in case of ambiguity: two authors match and at least one of them has affiliation string // 3) repeat (2) till the end of the list and then with different matching algorithms that have decreasing // trust in their output // At the end unmatched_authors will contain authors not matched with any of the matching algos @@ -87,7 +89,19 @@ object ORCIDAuthorEnricher extends Serializable { (author, orcid) => AuthorMatchers .matchOrderedTokenAndAbbreviations(author.getFullname, orcid.givenName + " " + orcid.familyName), - "orderedTokens", + "orderedTokens-1", + classid, + provenance, + skipAmbiguities = true + ) ++ + // split author names in tokens, order the tokens, then check for matches of full tokens or abbreviations + extractAndEnrichMatches( + unmatched_authors, + orcid_authors, + (author, orcid) => + AuthorMatchers + .matchOrderedTokenAndAbbreviations(author.getFullname, orcid.givenName + " " + orcid.familyName), + "orderedTokens-2", classid, provenance ) ++ @@ -116,50 +130,54 @@ object ORCIDAuthorEnricher extends Serializable { } private def extractAndEnrichMatches( - graph_authors: java.util.List[Author], - orcid_authors: java.util.List[OrcidAuthor], - matchingFunc: (Author, OrcidAuthor) => Boolean, - matchName: String, - classid:String, - provenance : String - ) = { - val matched = scala.collection.mutable.ArrayBuffer.empty[MatchedAuthors] + unmatched_authors: java.util.List[Author], + orcid_authors: java.util.List[OrcidAuthor], + matchingFunc: (Author, OrcidAuthor) => Boolean, + matchName: String, + classid: String, + provenance: String, + skipAmbiguities: Boolean = false + ): ArrayBuffer[MatchedAuthors] = { + val matched = ArrayBuffer.empty[MatchedAuthors] - if (graph_authors != null && !graph_authors.isEmpty) { - val ait = graph_authors.iterator + if (unmatched_authors == null || unmatched_authors.isEmpty) { + return matched + } - while (ait.hasNext) { - val author = ait.next() - val oit = orcid_authors.iterator - - breakable { - while (oit.hasNext) { - val orcid = oit.next() - - if (matchingFunc(author, orcid)) { - ait.remove() - oit.remove() - matched += MatchedAuthors(author, orcid, matchName) - - if (author.getPid == null) { - author.setPid(new util.ArrayList[StructuredProperty]()) - } - - val orcidPID = OafUtils.createSP(orcid.orcid, classid, classid) - //val orcidPID = OafUtils.createSP(orcid.orcid, ModelConstants.ORCID, ModelConstants.ORCID) - orcidPID.setDataInfo(OafUtils.generateDataInfo()) - orcidPID.getDataInfo.setProvenanceaction( - //OafUtils.createQualifier("ORCID_ENRICHMENT", "ORCID_ENRICHMENT") - OafUtils.createQualifier(provenance, provenance) - ) - - author.getPid.add(orcidPID) - - break() - } - } + val oit = orcid_authors.iterator + while (oit.hasNext) { + val orcid = oit.next() + val candidates = unmatched_authors.asScala.foldLeft(ArrayBuffer[Author]())((res, author) => { + if (matchingFunc(author, orcid)) { + res += author } + + res + }) + + if ( + candidates.size == 1 || + (candidates.size > 1 && !skipAmbiguities && !candidates + .exists(a => a.getRawAffiliationString != null && !a.getRawAffiliationString.isEmpty)) + ) { + val author = candidates(0) + unmatched_authors.remove(author) + oit.remove() + matched += MatchedAuthors(author, orcid, matchName) + + if (author.getPid == null) { + author.setPid(new util.ArrayList[StructuredProperty]()) + } + + val orcidPID = OafUtils.createSP(orcid.orcid, classid, classid) + orcidPID.setDataInfo(OafUtils.generateDataInfo()) + orcidPID.getDataInfo.setProvenanceaction( + OafUtils.createQualifier(provenance, provenance) + ) + + author.getPid.add(orcidPID) } + } matched From 64f4d7fb71d7c8645841a396aed10dc6febdc453 Mon Sep 17 00:00:00 2001 From: Giambattista Bloisi Date: Thu, 21 Nov 2024 16:18:29 +0100 Subject: [PATCH 70/73] [orcidenrichment] When comparing authors manage the case of hyphenation and punctuations characters and normalizes utf strings --- .../eu/dnetlib/dhp/utils/AuthorMatchers.scala | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/dhp-common/src/main/scala/eu/dnetlib/dhp/utils/AuthorMatchers.scala b/dhp-common/src/main/scala/eu/dnetlib/dhp/utils/AuthorMatchers.scala index 5f842726f..a75fec592 100644 --- a/dhp-common/src/main/scala/eu/dnetlib/dhp/utils/AuthorMatchers.scala +++ b/dhp-common/src/main/scala/eu/dnetlib/dhp/utils/AuthorMatchers.scala @@ -1,11 +1,12 @@ package eu.dnetlib.dhp.utils +import java.text.Normalizer import java.util.Locale import java.util.regex.Pattern import scala.util.control.Breaks.{break, breakable} object AuthorMatchers { - val SPLIT_REGEX = Pattern.compile("[\\s,\\.]+") + val SPLIT_REGEX = Pattern.compile("[\\s\\p{Punct}\\p{Pd}]+") val WORD_DIFF = 2 @@ -24,9 +25,16 @@ object AuthorMatchers { } } + def normalize(s: String): Array[String] = { + SPLIT_REGEX + .split(Normalizer.normalize(s, Normalizer.Form.NFC).toLowerCase(Locale.ROOT)) + .filter(_.nonEmpty) + .sorted + } + def matchOrderedTokenAndAbbreviations(a1: String, a2: String): Boolean = { - val p1: Array[String] = SPLIT_REGEX.split(a1.trim.toLowerCase(Locale.ROOT)).filter(_.nonEmpty).sorted - val p2: Array[String] = SPLIT_REGEX.split(a2.trim.toLowerCase(Locale.ROOT)).filter(_.nonEmpty).sorted + val p1: Array[String] = normalize(a1) + val p2: Array[String] = normalize(a2) if (p1.length < 2 || p2.length < 2) return false if (Math.abs(p1.length - p2.length) > WORD_DIFF) return false // use alternative comparison algo From 6260526fa1a98a4b75c0b7ab573226238feae17c Mon Sep 17 00:00:00 2001 From: Giambattista Bloisi Date: Thu, 21 Nov 2024 16:21:03 +0100 Subject: [PATCH 71/73] [orcidenrichment] Fix imports and formatting --- .../dhp/common/enrichment/Constants.java | 3 ++- .../eu/dnetlib/dhp/PropagationConstant.java | 6 ++--- .../SparkResultToProjectThroughSemRelJob.java | 1 + ...kResultToCommunityFromOrganizationJob.java | 1 + .../SparkResultToCommunityFromProject.java | 1 + ...parkResultToCommunityThroughSemRelJob.java | 1 + .../SparkJobTest.java | 5 +++-- .../SparkEnrichGraphWithOrcidAuthors.scala | 22 +++++++++++-------- 8 files changed, 25 insertions(+), 15 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/enrichment/Constants.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/enrichment/Constants.java index df433dddb..becadb82b 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/enrichment/Constants.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/enrichment/Constants.java @@ -1,5 +1,6 @@ + package eu.dnetlib.dhp.common.enrichment; public class Constants { - public static final String PROPAGATION_DATA_INFO_TYPE = "propagation"; + public static final String PROPAGATION_DATA_INFO_TYPE = "propagation"; } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java index 8dd6e38ba..c28610e1e 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java @@ -1,6 +1,8 @@ package eu.dnetlib.dhp; +import static eu.dnetlib.dhp.common.enrichment.Constants.PROPAGATION_DATA_INFO_TYPE; + import java.util.ArrayList; import java.util.List; import java.util.Optional; @@ -21,8 +23,6 @@ import eu.dnetlib.dhp.schema.oaf.DataInfo; import eu.dnetlib.dhp.schema.oaf.Qualifier; import eu.dnetlib.dhp.schema.oaf.Relation; -import static eu.dnetlib.dhp.common.enrichment.Constants.PROPAGATION_DATA_INFO_TYPE; - public class PropagationConstant { private PropagationConstant() { @@ -48,7 +48,7 @@ public class PropagationConstant { public static final String INSTITUTIONAL_REPO_TYPE = "institutional"; - //public static final String PROPAGATION_DATA_INFO_TYPE = "propagation"; + // public static final String PROPAGATION_DATA_INFO_TYPE = "propagation"; public static final String TRUE = "true"; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java index a6466716a..a28aba5d3 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java @@ -3,6 +3,7 @@ package eu.dnetlib.dhp.projecttoresult; import static eu.dnetlib.dhp.PropagationConstant.*; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import static eu.dnetlib.dhp.common.enrichment.Constants.PROPAGATION_DATA_INFO_TYPE; import java.util.ArrayList; import java.util.List; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java index f84a10d1c..c51e1fa78 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java @@ -3,6 +3,7 @@ package eu.dnetlib.dhp.resulttocommunityfromorganization; import static eu.dnetlib.dhp.PropagationConstant.*; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import static eu.dnetlib.dhp.common.enrichment.Constants.PROPAGATION_DATA_INFO_TYPE; import java.util.ArrayList; import java.util.Arrays; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/SparkResultToCommunityFromProject.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/SparkResultToCommunityFromProject.java index 7a6238940..916af964e 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/SparkResultToCommunityFromProject.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/SparkResultToCommunityFromProject.java @@ -5,6 +5,7 @@ import static eu.dnetlib.dhp.PropagationConstant.*; import static eu.dnetlib.dhp.PropagationConstant.PROPAGATION_RESULT_COMMUNITY_ORGANIZATION_CLASS_NAME; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import static eu.dnetlib.dhp.common.enrichment.Constants.PROPAGATION_DATA_INFO_TYPE; import java.io.Serializable; import java.util.ArrayList; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java index 3cf2f73c3..b923c4c32 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java @@ -3,6 +3,7 @@ package eu.dnetlib.dhp.resulttocommunityfromsemrel; import static eu.dnetlib.dhp.PropagationConstant.*; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; +import static eu.dnetlib.dhp.common.enrichment.Constants.PROPAGATION_DATA_INFO_TYPE; import java.util.*; import java.util.stream.Collectors; diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/SparkJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/SparkJobTest.java index db917658a..10c817ae4 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/SparkJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/SparkJobTest.java @@ -25,6 +25,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.KeyValueSet; import eu.dnetlib.dhp.PropagationConstant; +import eu.dnetlib.dhp.common.enrichment.Constants; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Relation; @@ -145,7 +146,7 @@ public class SparkJobTest { .foreach( r -> Assertions .assertEquals( - PropagationConstant.PROPAGATION_DATA_INFO_TYPE, r.getDataInfo().getInferenceprovenance())); + Constants.PROPAGATION_DATA_INFO_TYPE, r.getDataInfo().getInferenceprovenance())); result .foreach( r -> Assertions @@ -428,7 +429,7 @@ public class SparkJobTest { .foreach( r -> Assertions .assertEquals( - PropagationConstant.PROPAGATION_DATA_INFO_TYPE, r.getDataInfo().getInferenceprovenance())); + Constants.PROPAGATION_DATA_INFO_TYPE, r.getDataInfo().getInferenceprovenance())); project .foreach( r -> Assertions diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/enrich/orcid/SparkEnrichGraphWithOrcidAuthors.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/enrich/orcid/SparkEnrichGraphWithOrcidAuthors.scala index f10a9e56f..7478225b5 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/enrich/orcid/SparkEnrichGraphWithOrcidAuthors.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/enrich/orcid/SparkEnrichGraphWithOrcidAuthors.scala @@ -11,7 +11,12 @@ import scala.collection.JavaConverters._ class SparkEnrichGraphWithOrcidAuthors(propertyPath: String, args: Array[String], log: Logger) extends SparkEnrichWithOrcidAuthors(propertyPath, args, log: Logger) { - override def createTemporaryData(spark:SparkSession, graphPath: String, orcidPath: String, targetPath: String): Unit = { + override def createTemporaryData( + spark: SparkSession, + graphPath: String, + orcidPath: String, + targetPath: String + ): Unit = { val orcidAuthors = spark.read.load(s"$orcidPath/Authors").select("orcid", "familyName", "givenName", "creditName", "otherNames") @@ -82,14 +87,13 @@ class SparkEnrichGraphWithOrcidAuthors(propertyPath: String, args: Array[String] } } - object SparkEnrichGraphWithOrcidAuthors { +object SparkEnrichGraphWithOrcidAuthors { - val log: Logger = LoggerFactory.getLogger(SparkEnrichGraphWithOrcidAuthors.getClass) + val log: Logger = LoggerFactory.getLogger(SparkEnrichGraphWithOrcidAuthors.getClass) - def main(args: Array[String]): Unit = { - new SparkEnrichGraphWithOrcidAuthors("/eu/dnetlib/dhp/enrich/orcid/enrich_graph_orcid_parameters.json", args, log) - .initialize() - .run() - } + def main(args: Array[String]): Unit = { + new SparkEnrichGraphWithOrcidAuthors("/eu/dnetlib/dhp/enrich/orcid/enrich_graph_orcid_parameters.json", args, log) + .initialize() + .run() } - +} From d095b31ea8bff04d3e63d7dca534578f27e5250a Mon Sep 17 00:00:00 2001 From: Giambattista Bloisi Date: Thu, 21 Nov 2024 16:24:17 +0100 Subject: [PATCH 72/73] [orcidenrichment] Fix lambda to avoid requiring serialization on enclosing class --- .../common/author/SparkEnrichWithOrcidAuthors.scala | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/dhp-common/src/main/scala/eu/dnetlib/dhp/common/author/SparkEnrichWithOrcidAuthors.scala b/dhp-common/src/main/scala/eu/dnetlib/dhp/common/author/SparkEnrichWithOrcidAuthors.scala index 47d563649..2f224aa26 100644 --- a/dhp-common/src/main/scala/eu/dnetlib/dhp/common/author/SparkEnrichWithOrcidAuthors.scala +++ b/dhp-common/src/main/scala/eu/dnetlib/dhp/common/author/SparkEnrichWithOrcidAuthors.scala @@ -25,14 +25,16 @@ abstract class SparkEnrichWithOrcidAuthors(propertyPath: String, args: Array[Str log.info(s"targetPath is '$targetPath'") val workingDir = parser.get("workingDir") log.info(s"targetPath is '$workingDir'") - val classid = Option(parser.get("matchingSource")).map(_=>ModelConstants.ORCID_PENDING).getOrElse(ModelConstants.ORCID) + val classid = + Option(parser.get("matchingSource")).map(_ => ModelConstants.ORCID_PENDING).getOrElse(ModelConstants.ORCID) log.info(s"classid is '$classid'") - val provenance = Option(parser.get("matchingSource")).map(_=>PROPAGATION_DATA_INFO_TYPE).getOrElse("ORCID_ENRICHMENT") + val provenance = + Option(parser.get("matchingSource")).map(_ => PROPAGATION_DATA_INFO_TYPE).getOrElse("ORCID_ENRICHMENT") log.info(s"targetPath is '$workingDir'") createTemporaryData(spark, graphPath, orcidPath, workingDir) - analisys(workingDir,classid,provenance) + analisys(workingDir, classid, provenance) generateGraph(spark, graphPath, workingDir, targetPath) } @@ -75,13 +77,15 @@ abstract class SparkEnrichWithOrcidAuthors(propertyPath: String, args: Array[Str .filter(e => ModelSupport.isResult(e._1)) .foreach(e => { val resultType = e._1.name() + val c = classid + val p = provenance spark.read .parquet(s"$targetPath/${resultType}_unmatched") .where("size(graph_authors) > 0") .as[MatchData](Encoders.bean(classOf[MatchData])) .map(md => { - ORCIDAuthorEnricher.enrichOrcid(md.id, md.graph_authors, md.orcid_authors, classid, provenance) + ORCIDAuthorEnricher.enrichOrcid(md.id, md.graph_authors, md.orcid_authors, c, p) })(Encoders.bean(classOf[ORCIDAuthorEnricherResult])) .write .option("compression", "gzip") From 71fe0374dc8f399d7f5ba33fb198c30eaae8ddd8 Mon Sep 17 00:00:00 2001 From: Giambattista Bloisi Date: Tue, 17 Dec 2024 16:01:03 +0100 Subject: [PATCH 73/73] Revise propagation tests --- .../author/SparkEnrichWithOrcidAuthors.scala | 3 +- .../dhp/oa/dedup/DecisionTreeTest.java | 11 ++- .../dnetlib/dhp/oa/dedup/SparkDedupTest.java | 3 +- .../dhp/oa/dedup/jpath/JsonPathTest.java | 8 +- .../orcidtoresultfromsemrel/OrcidAuthors.java | 19 ++-- .../SparkPropagateOrcidAuthor.java | 1 + .../CountryPropagationJobTest.java | 3 +- .../DatasourceCountryPreparationTest.java | 3 +- .../OrcidPropagationJobTest.java | 90 +++++++----------- .../ResultToCommunityJobTest.java | 3 +- .../ResultToCommunityJobTest.java | 3 +- .../noupdate/dataset/dataset_10.json.gz | Bin 0 -> 6787 bytes .../noupdate/otherresearchproduct/empty.json | 0 .../sample/noupdate/publication/empty.json | 0 .../sample/noupdate/relation/empty.json | 0 .../sample/noupdate/software/empty.json | 0 .../sample/oneupdate/dataset/dataset_10.json | 10 ++ .../oneupdate/otherresearchproduct/empty.json | 0 .../sample/oneupdate/publication/empty.json | 0 .../sample/oneupdate/relation/relation.json | 1 + .../sample/oneupdate/software/empty.json | 0 21 files changed, 80 insertions(+), 78 deletions(-) create mode 100644 dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/sample/noupdate/dataset/dataset_10.json.gz create mode 100644 dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/sample/noupdate/otherresearchproduct/empty.json create mode 100644 dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/sample/noupdate/publication/empty.json create mode 100644 dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/sample/noupdate/relation/empty.json create mode 100644 dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/sample/noupdate/software/empty.json create mode 100644 dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/sample/oneupdate/dataset/dataset_10.json create mode 100644 dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/sample/oneupdate/otherresearchproduct/empty.json create mode 100644 dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/sample/oneupdate/publication/empty.json create mode 100644 dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/sample/oneupdate/relation/relation.json create mode 100644 dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/sample/oneupdate/software/empty.json diff --git a/dhp-common/src/main/scala/eu/dnetlib/dhp/common/author/SparkEnrichWithOrcidAuthors.scala b/dhp-common/src/main/scala/eu/dnetlib/dhp/common/author/SparkEnrichWithOrcidAuthors.scala index 2f224aa26..ad8ebd681 100644 --- a/dhp-common/src/main/scala/eu/dnetlib/dhp/common/author/SparkEnrichWithOrcidAuthors.scala +++ b/dhp-common/src/main/scala/eu/dnetlib/dhp/common/author/SparkEnrichWithOrcidAuthors.scala @@ -72,7 +72,7 @@ abstract class SparkEnrichWithOrcidAuthors(propertyPath: String, args: Array[Str def createTemporaryData(spark: SparkSession, graphPath: String, orcidPath: String, targetPath: String): Unit - private def analisys(targetPath: String, classid:String, provenance:String): Unit = { + private def analisys(targetPath: String, classid: String, provenance: String): Unit = { ModelSupport.entityTypes.asScala .filter(e => ModelSupport.isResult(e._1)) .foreach(e => { @@ -94,4 +94,3 @@ abstract class SparkEnrichWithOrcidAuthors(propertyPath: String, args: Array[Str }) } } - diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/DecisionTreeTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/DecisionTreeTest.java index 5094317cb..217840c86 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/DecisionTreeTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/DecisionTreeTest.java @@ -21,9 +21,13 @@ class DecisionTreeTest { void testJPath() throws IOException { DedupConfig conf = DedupConfig - .load(IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/dedup/jpath/dedup_conf_organization.json"))); + .load( + IOUtils + .toString( + getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/dedup/jpath/dedup_conf_organization.json"))); - final String org = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/dedup/jpath/organization.json")); + final String org = IOUtils + .toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/dedup/jpath/organization.json")); Row row = SparkModel.apply(conf).rowFromJson(org); @@ -42,7 +46,8 @@ class DecisionTreeTest { .getResourceAsStream( "/eu/dnetlib/dhp/dedup/conf/org.curr.conf.json"))); - final String org = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/dedup/jpath/organization_example1.json")); + final String org = IOUtils + .toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/dedup/jpath/organization_example1.json")); Row row = SparkModel.apply(conf).rowFromJson(org); // to check that the same parsing returns the same row diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java index d2d5af501..a3004c25a 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java @@ -440,7 +440,8 @@ public class SparkDedupTest implements Serializable { .count(); final List merges = pubs - .filter("source == '50|doi_dedup___::d5021b53204e4fdeab6ff5d5bc468032'")// and relClass = '"+ModelConstants.MERGES+"'") + .filter("source == '50|doi_dedup___::d5021b53204e4fdeab6ff5d5bc468032'")// and relClass = + // '"+ModelConstants.MERGES+"'") .collectAsList(); assertEquals(4, merges.size()); Set dups = Sets diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/jpath/JsonPathTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/jpath/JsonPathTest.java index 0923f67fa..d54f608ac 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/jpath/JsonPathTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/jpath/JsonPathTest.java @@ -19,9 +19,13 @@ class JsonPathTest { void testJPath() throws IOException { DedupConfig conf = DedupConfig - .load(IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/dedup/jpath/dedup_conf_organization.json"))); + .load( + IOUtils + .toString( + getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/dedup/jpath/dedup_conf_organization.json"))); - final String org = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/dedup/jpath/organization.json")); + final String org = IOUtils + .toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/dedup/jpath/organization.json")); Row row = SparkModel.apply(conf).rowFromJson(org); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidAuthors.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidAuthors.java index 0c687b991..582823660 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidAuthors.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidAuthors.java @@ -1,18 +1,19 @@ -package eu.dnetlib.dhp.orcidtoresultfromsemrel; -import eu.dnetlib.dhp.utils.OrcidAuthor; +package eu.dnetlib.dhp.orcidtoresultfromsemrel; import java.io.Serializable; import java.util.List; +import eu.dnetlib.dhp.utils.OrcidAuthor; + public class OrcidAuthors implements Serializable { - List orcidAuthorList; + List orcidAuthorList; - public List getOrcidAuthorList() { - return orcidAuthorList; - } + public List getOrcidAuthorList() { + return orcidAuthorList; + } - public void setOrcidAuthorList(List orcidAuthorList) { - this.orcidAuthorList = orcidAuthorList; - } + public void setOrcidAuthorList(List orcidAuthorList) { + this.orcidAuthorList = orcidAuthorList; + } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkPropagateOrcidAuthor.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkPropagateOrcidAuthor.java index 5c13cba3c..86a042b21 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkPropagateOrcidAuthor.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkPropagateOrcidAuthor.java @@ -1,3 +1,4 @@ + package eu.dnetlib.dhp.orcidtoresultfromsemrel; import java.util.List; diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/CountryPropagationJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/CountryPropagationJobTest.java index c4141b3e8..bb20291d1 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/CountryPropagationJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/CountryPropagationJobTest.java @@ -7,6 +7,7 @@ import java.nio.file.Path; import java.util.ArrayList; import java.util.List; +import com.fasterxml.jackson.databind.DeserializationFeature; import org.apache.commons.io.FileUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; @@ -33,7 +34,7 @@ public class CountryPropagationJobTest { private static final Logger log = LoggerFactory.getLogger(CountryPropagationJobTest.class); - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper().configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); private static SparkSession spark; diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/DatasourceCountryPreparationTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/DatasourceCountryPreparationTest.java index d9b879de8..5067b1cf7 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/DatasourceCountryPreparationTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/DatasourceCountryPreparationTest.java @@ -5,6 +5,7 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; +import com.fasterxml.jackson.databind.DeserializationFeature; import org.apache.commons.io.FileUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; @@ -19,7 +20,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; public class DatasourceCountryPreparationTest { - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper().configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); private static SparkSession spark; diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidPropagationJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidPropagationJobTest.java index 40849132e..59a8f82f3 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidPropagationJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidPropagationJobTest.java @@ -71,23 +71,24 @@ public class OrcidPropagationJobTest { .getResource( "/eu/dnetlib/dhp/orcidtoresultfromsemrel/preparedInfo/mergedOrcidAssoc") .getPath(); - SparkOrcidToResultFromSemRelJob - .main( - new String[] { - "-isTest", Boolean.TRUE.toString(), - "-isSparkSessionManaged", Boolean.FALSE.toString(), - "-sourcePath", sourcePath, - "-hive_metastore_uris", "", - "-saveGraph", "true", - "-resultTableName", Dataset.class.getCanonicalName(), - "-outputPath", workingDir.toString() + "/dataset", - "-possibleUpdatesPath", possibleUpdatesPath - }); + SparkPropagateOrcidAuthor + .main( + new String[] { + "-graphPath", + getClass() + .getResource( + "/eu/dnetlib/dhp/orcidtoresultfromsemrel/sample/noupdate") + .getPath(), + "-targetPath", + workingDir.toString() + "/graph", + "-orcidPath", "", + "-workingDir", workingDir.toString() + }); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); JavaRDD tmp = sc - .textFile(workingDir.toString() + "/dataset") + .textFile(workingDir.toString() + "/graph/dataset") .map(item -> OBJECT_MAPPER.readValue(item, Dataset.class)); // tmp.map(s -> new Gson().toJson(s)).foreach(s -> System.out.println(s)); @@ -110,36 +111,24 @@ public class OrcidPropagationJobTest { @Test void oneUpdateTest() throws Exception { - SparkOrcidToResultFromSemRelJob - .main( - new String[] { - "-isTest", - Boolean.TRUE.toString(), - "-isSparkSessionManaged", - Boolean.FALSE.toString(), - "-sourcePath", - getClass() - .getResource("/eu/dnetlib/dhp/orcidtoresultfromsemrel/sample/oneupdate") - .getPath(), - "-hive_metastore_uris", - "", - "-saveGraph", - "true", - "-resultTableName", - "eu.dnetlib.dhp.schema.oaf.Dataset", - "-outputPath", - workingDir.toString() + "/dataset", - "-possibleUpdatesPath", - getClass() - .getResource( - "/eu/dnetlib/dhp/orcidtoresultfromsemrel/preparedInfo/mergedOrcidAssoc") - .getPath() - }); + SparkPropagateOrcidAuthor + .main( + new String[] { + "-graphPath", + getClass() + .getResource( + "/eu/dnetlib/dhp/orcidtoresultfromsemrel/sample/oneupdate") + .getPath(), + "-targetPath", + workingDir.toString() + "/graph", + "-orcidPath", "", + "-workingDir", workingDir.toString() + }); final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); JavaRDD tmp = sc - .textFile(workingDir.toString() + "/dataset") + .textFile(workingDir.toString() + "/graph/dataset") .map(item -> OBJECT_MAPPER.readValue(item, Dataset.class)); // tmp.map(s -> new Gson().toJson(s)).foreach(s -> System.out.println(s)); @@ -177,31 +166,18 @@ public class OrcidPropagationJobTest { @Test void twoUpdatesTest() throws Exception { - SparkOrcidToResultFromSemRelJob + SparkPropagateOrcidAuthor .main( new String[] { - "-isTest", - Boolean.TRUE.toString(), - "-isSparkSessionManaged", - Boolean.FALSE.toString(), - "-sourcePath", + "-graphPath", getClass() .getResource( "/eu/dnetlib/dhp/orcidtoresultfromsemrel/sample/twoupdates") .getPath(), - "-hive_metastore_uris", - "", - "-saveGraph", - "true", - "-resultTableName", - "eu.dnetlib.dhp.schema.oaf.Dataset", - "-outputPath", + "-targetPath", workingDir.toString() + "/dataset", - "-possibleUpdatesPath", - getClass() - .getResource( - "/eu/dnetlib/dhp/orcidtoresultfromsemrel/preparedInfo/mergedOrcidAssoc") - .getPath() + "-orcidPath", "", + "-workingDir", workingDir.toString() }); final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromorganization/ResultToCommunityJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromorganization/ResultToCommunityJobTest.java index 6440ded92..d3505a604 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromorganization/ResultToCommunityJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromorganization/ResultToCommunityJobTest.java @@ -7,6 +7,7 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; +import com.fasterxml.jackson.databind.DeserializationFeature; import org.apache.commons.io.FileUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; @@ -30,7 +31,7 @@ public class ResultToCommunityJobTest { private static final Logger log = LoggerFactory.getLogger(ResultToCommunityJobTest.class); - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper().configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); private static SparkSession spark; diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/ResultToCommunityJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/ResultToCommunityJobTest.java index 2b52c91de..0a7eb414a 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/ResultToCommunityJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/ResultToCommunityJobTest.java @@ -10,6 +10,7 @@ import java.util.ArrayList; import java.util.List; import java.util.stream.Collectors; +import com.fasterxml.jackson.databind.DeserializationFeature; import org.apache.commons.io.FileUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; @@ -34,7 +35,7 @@ public class ResultToCommunityJobTest { private static final Logger log = LoggerFactory.getLogger(ResultToCommunityJobTest.class); - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper().configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); private static SparkSession spark; diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/sample/noupdate/dataset/dataset_10.json.gz b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/sample/noupdate/dataset/dataset_10.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..778f722624bf37dbe53d470c6510cc464d1b68fb GIT binary patch literal 6787 zcmV-}8hqs+iwFo+t(jf`17u-zVRL14UokK)YIARH0PS5*kK8tvzt5*23K(DpsIHb& zrTTYssymrBY{#%Y17ybwgOXTP>_}pTk~*nm4Qy_E-w&`buooYE82FZJUGpjSlk9sW zRZ>+~|I2ib6q||TbXSq$9HlIuR&760ZD-pu z=80F7di%E9fp+s^1?plsPgs`IWYw9v{@8bXe>@ydoC$OqTk@U%h*(st-h4lQ<2deg z+8+ktoKkl%8HD4(#2<`ke=v;ba6G0F^9CSY8lS-f{@xMM8}(0rGM-GQju%CK zlM2Kn(eo5yVi7XXS#?Unyr`{ zD^ZenJjZj6oId<{<@TqJKlO$^XX5@_r!1XInr3q$6MDGDaO(Ga?oicWKR{`X#owVh zL3v$wKNW}?LfAqeD zKU?*2(JR99um^(>J_cFTm){l87NQ%mG&cr{f=&oqlqDUe7rO zG2TSsvGOyt$d^K@u}0@}9&@99)ax7uqk;QzkuJLA83#-mv}``z{$38(-GO}N==}P{ z?CfOEM9a@AOVp33Tb=R_l}B;lpCkCsJ(~D#|HvB*#~n03u%vInSwh`H*5rple0?Xs zlAHhC{7=lfQE?}@rAiOo|Mar%S? z(uVwqQk(h4T{fl2UdFW6Rr_%Tm2 zGm-$Vn1|rec1D#61&5N+9NZXq!~`T$kzi9gI}X-IX?T=I$Ag}8Y_Yw=;M)OhC%FCm z^x2+dU$0XO=+cv zyML%42(c8ILLLRUSN2c0Q^**3NydJcQQTY7G>TadUsy~)(~AAmzwW0SqW*U9Ib_mchCB~Pi$@rBWRSN>X& z$8>4->(5ZUQ=_o0Dkj_xb*;1)OEbHfW`^$NLcPO(39tFEm?3NUCMGa2!KpF3FWcm% z-6?#V3xo>W<`=@oEgHq+1PH|S30nh zTR1QqU}z)cw+}Gf*%Z1Ygog)`Mk=%bi;-(X3cc5<{`Z7Qeu0&6odR;NVTpsk5P%Yv zw0xmC^W=5s2_>MG!u*a49+c8Pd!g_4;4@xM3rT@j|y;>h-g^GL@VO~gBF|GAy z8CimTfOzm6n!WCH!8lpUbXaAMuTZK3p-AC%Tw=fOC`6CEWJEFy)@(kKB1IvJh+`j( z>MsOYB9~zsmjO+{8v)5`N{|9SMwqdo5Z0#9iI{=fQEg1Zg1MAN;#NThKP-!crics( zUG=;y1|8%%s0N+@YfMH2s4DB(4V-%n_4r55@W^wlPTLXcv0;i&*>G%_q6J|J+>L5; zn8JEw8>X;fihB%GOq|K+$}q)zJQ)lJ;edvN5I&kSCmgwhem`(WK2>MGuMJb&RK1;H zis4Jy^G>>X?6%&Mol5-I zpH;1IdQhcatqO!37Kr`&bIG~0p%EJz`E+NN`abX$99C_j*kc_HbIcQPzc5G6)o`(A zIvNGzx!D-aF7J$5V44KR8@MhkvS7l8w+?+WO-(DO=S+q?~?PAevjRLSNY46iw6yMl@sRl??2=@82Xd#_WYYmhPE zoA;^{IfV~EPue&rD%f;q^A1YoMoy9eT#*jG>2TU_7||fX{bOji7Vj~DHpJ}=&$7x} z52sP$Vmsb3?YM4YDqQs^oE@V%9O4 ziR>C+Et#T?CUJW*#SYB7WD0n7h3C&%20!Wm|5xSN;X4N9ao-E93TzCI@RI9~5hG<{ zNzHj&)!y|)321*+z1ZRB&{y_d!DAjsuIUdH-snLlR-4~TYDPrU^2l^_c+w%zHdV!| zLTa(5WelNqR#a3BtX-s(Bs67EPjM|WwkRZ?sD6sSfx+Q!osxG-YWq&*uGTRI5ssby z@m4jx-c1=`ZAjO$Abv+W{THzy{uz&D(X^xoi>)O+w1xEGTGGQOHn1$|;WNhnM$*Gz zeC4@qkM-yMLC7YK?>TeI`o2R)%ykFjXcU1OI@gmPZmixu(nEjHo4DScNDqUa=Z+tW z^iavuJn6wC%Q#4S=uZdzspmFIdiWc7AQzL{hJ_|*Y zP<+8sJa5abgR&F`3(qYm3}?GR;Ui+^ccNra37pp?Weci*MhuIrJbE6Q6+`jrV$wVM`O>3<(lQUzLVWFOwm3hQz zX>>gm-_H&Q_{YaFJqm*GC{ywM*QsJ-oNU<`5!c%x%DkV@KuD@~g0P=gdN4BvZrGDa z)u1t{EW*u4ed+Y(9mzQE@2Lb2mo-`G57Vvha3PRovA6nDj0-H76^Rn7RGv;P-+5N3 zd#?8zLb%d|$`Cu2=29n~4=I?Q>Dzp-?^Wgs)uIT_nabIYBIhvkCMpRGv1MU`KpOk)3C0Z74D=Y?7{5cY#&qi8A$1&QN*E{fu(P*UU^&CiqsaV3tn z9J&mOsbjOvgpUdJ*$*p>d~Hon0rU;_YfF2fsHzM=Pl`1G14y*zoswK?#!N_|^DquH zVAmVQR?51@X==DP%DT+b6%AqyZH7yD)CI$QC14+>KIlF*D4>ireWBQ^f<+pCskO14 z3Q({f-dCVaL~WdkntdqekeOkD$ih(9T)M8YDf93pMX*%QBuUu0kX=(te^1fGK=)>% zqS|hZv&flDI6NF1sQ34EN2&ig8U~8CKR#0MGn>`D%0%dP@FEfvjD(e9OsHW^<@MVk-i{c!9#;lOu-$Qeg8 zguk3o2p>k_ymJE;?WXFjq@sT&*aMSsgisnt>kJo=#D%z}2 zTP8D_iY~n17P5BJdHHKaC99S0Sl-yZ^2TN&DR2+eCi!SQ==qe&Z^5sp3leEpXyIrS z4yKY}X97k`Y>63-kB!i#U1^xQm*CNGi$V)fNFx>rX`NEDP@J8SCw!q2cbu?*;>{*! zvqz^l-fUvAd3bDwt~=z}b85pqG(l0Af!j}%!~5#Khb6uppGz;rm$ z9j2hq&HDteHX^4n(R+4^>Vw;ZS&n@Ioa4>=tP2o8AF0%A6m072AD}{ndJq6#BvQqP zf##t{=bMjUx|*CMS&?cUi*A=o15(bC?e>V`tAsR-6e9>mWMTTeCM(ca0{si9;)7`_ zUS7*@q~w;0CP#881lEs1W2#Jo$ND~~o`LNU=4vCfWUZAJNjASI&f1nE1Y;`EG(iw4 zWu`%lky2!tplC~QQdBX4pm7wXBpQVxQ5-_UFER9xu)Zgx`F8+rk33fwc^M^%2!2+A zp@)I;Ca2O|R75r~os1d=QYbeca=ac(HKE4{Xs^t3mP@wn35v|J6<8Y2i~1<-kt;NX zqvKk;#|%>XJ}#YU4M}#H=HNibQ>JgdQgk$-(@7=U+*4q+2~W`gG%w8TA)`hf8HuYD zTq;Lf$OCvS`WqFsy^#Hqd8@reyw-&+b6GSR z^;gw03Ik$M5n4>z!(zFV_vj46Y!_wp7hd8p%?3+#o|)R9s#4TM)yhBxBRcFqIbwSJGV~J@@A*Q ziIb{aH9F4X_Y6*EGq zw>}y%6^f=@%i2) z?IM{V)sHpFcrffJum@RDdU@3W3L^c$OZvc);)6^+p_=gc(}m6{vzNljG-z_Hl&q#* zZFUK0oFOAcaSv`zMrF(sNeUeEE2U^4(C%3Z56z3*Y|ZQ-_5~No=6$4biOF9?wo?0g zu7`!chDi94bZ1Fl052`J)N-=fEaBAx|XW^R4`D!EBNZr=+ZP4&FAP6 z8ZpI~h;(`uCJYd$y6!fElgjo(4POM@i#GTji!k6B%h35*SgMn}W&{NYioyeyTyW(MFq`}94x#wGi_LpY z@RDL7siCJS(o{;fiNfl%vMmV_>^)=B;!q_{^TZ*OyX_Kj$njCw zK4w(1#34%@vYCl~hm*u^WVpViWU*)x3p*@Y0|lz9JmP7QXqIIMFDn*|c7So!nPQ7d zv(5_lv)~WI^{d6AEfWWZO}U6RAF>=~b&E@rxO_>!Wh7=-S=hNC&o{qdpiRm12$MBY z(kIUZ(%I;h8Nl|;yPnTWvs`xhHkv}Uq$#O(iuFy63B^m$j4m|6DBI=*N<^73%)H6K z2U?=vo};JLDZz3kL!}Oz63nIqvnj!BO0dstg0=Zjeow?c%QO3w4af4#TER0Lwwh;V zJ+kGQS)SQF^2|KPaj)W;MWcz^haZQdfQ7@T&jQbN1`%VyfDI-?w{tzu?8fTtZFjuWmRnQAQL$P&#g(d?E)vsY?lRd4=e zrrwjyuEaieHpI|ioYVr*5+H28q_$vTXtEZBCP8>rW!bPmv?B_wHmB9*%u769A2M=6 z*92FJHuU`a!uIx|=hK*w!?@S7yKIwABqGaoUJ1-NVR9m{0|^01i~?X-4%0IPLIq;# z*>6}GEV)%^m8#HKrs6iR$V>vc4Yptbn-JlC<0klC=vbZFxQUIM*tp4O5I4c?v3AEz ztVgzS6B{?V$GC}i<;@<^#EJUUoeu{z@`i)JX90`GwC@Mr*y#^$y4mB#>aC2MOx(Mj z&mZ+hgV6)VO^iHE$4y3)gK-l)pg(-&`qM#w>N+-VV&f+Efd21rmZ)z>g5l$~#c3g7 ztlAw(dluP(VRCMZSaHnlm}nLTb;i|R@zFF6GhV>jh~oJKrsfJ^4^cKeStKHqnBLt2 z(I^n7@QS40Kq7r)(ZbQ}xKJQuWLYHWt}Gh%K)yxkwp550G(lLN6g(q;qcYsQlOc=o zV7`i2*IpF~#p8?W>|dgl+3-O5>&|a6SiEbcI%B`Y z+mD!3v%5a0tncGN#T~A%D`OwFYcsMob@kzXJXDRPRxIY&#aKB-Gh>+py5Z%ec8|Qm z`%^aW@kXEO8l6b!{*<{23@xNfUect1Z=OQI&4={p3n~L9g{6z!X*k^GP?zgkom#p` lE$U(EB9<=lsnSI-D%ftii1o;pF7oyd{|_dcr49N|0RY)$9x4C; literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/sample/noupdate/otherresearchproduct/empty.json b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/sample/noupdate/otherresearchproduct/empty.json new file mode 100644 index 000000000..e69de29bb diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/sample/noupdate/publication/empty.json b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/sample/noupdate/publication/empty.json new file mode 100644 index 000000000..e69de29bb diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/sample/noupdate/relation/empty.json b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/sample/noupdate/relation/empty.json new file mode 100644 index 000000000..e69de29bb diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/sample/noupdate/software/empty.json b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/sample/noupdate/software/empty.json new file mode 100644 index 000000000..e69de29bb diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/sample/oneupdate/dataset/dataset_10.json b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/sample/oneupdate/dataset/dataset_10.json new file mode 100644 index 000000000..215acd239 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/sample/oneupdate/dataset/dataset_10.json @@ -0,0 +1,10 @@ +{"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1585055868909,"id":"50|dedup_wf_001::36bcfaa1494c849547a346da688ade24","originalId":["od______3989::02dd5d2c222191b0b9bd4f33c8e96529"],"collectedfrom":[{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null}],"pid":[{"value":"10.4185/RLCS-2018-1243","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofcollection":"2020-03-03T13:05:26.091Z","dateoftransformation":"2020-03-03T13:06:53.161Z","extraInfo":[],"oaiprovenance":{"originDescription":{"harvestDate":"2020-03-03T13:05:26.091Z","altered":true,"baseURL":"http%3A%2F%2Frepositorio.ucjc.edu%2Foai%2Frequest","identifier":"oai:repositorio.ucjc.edu:20.500.12020/562","datestamp":"2018-01-23T15:06:07Z","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"author":[{"affiliation":[],"fullname":"Jung, Nicole","name":"Nicole","pid":[{"dataInfo":null,"qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":null,"schemename":null},"value":"0000-0001-9513-2468"}],"rank":1,"surname":"Jung"},{"affiliation":[],"fullname":"Gräßle, Simone","name":"Simone","pid":[],"rank":2,"surname":"Gräßle"}],"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"esl/spa","classname":"Spanish","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[{"value":"Ciencias de la Comunicación","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"yihadismo","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"viralidad","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"vídeo","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"propaganda","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"terrorismo","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"terrorism","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"jihadism","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"virality","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"video","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"5903 Ideologías Políticas","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"6310.13 Terrorismo","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[{"value":"El individualismo como estrategia","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Individualism as an improvised strategy","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[],"description":[{"value":"Se analiza como el Yihadismo distribuye su mensaje propaganístico en You Tube.\nUniversidad Camilo José Cela","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2018-01-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":null,"embargoenddate":null,"source":[],"fulltext":[],"format":[],"contributor":[{"value":"MediaLab - UCJC","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"resourcetype":null,"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":{"value":"http://creativecommons.org/licenses/by-nc-nd/4.0/","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"instancetype":{"classid":"0024","classname":"Film","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null},"url":["http://hdl.handle.net/20.500.12020/562"],"distributionlocation":"","collectedfrom":{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null},"dateofacceptance":{"value":"2018-01-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"storagedate":null,"device":null,"size":null,"version":null,"lastmetadataupdate":null,"metadataversionnumber":null,"geolocation":[]} +{"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1585055868954,"id":"50|dedup_wf_001::95b033c0c3961f6a1cdcd41a99a9632e","originalId":["od______3989::05d8c751462f9bb8d2b06956dfbc5c7b"],"collectedfrom":[{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null}],"pid":[],"dateofcollection":"2020-03-03T13:05:26.079Z","dateoftransformation":"2020-03-03T13:06:53.239Z","extraInfo":[],"oaiprovenance":{"originDescription":{"harvestDate":"2020-03-03T13:05:26.079Z","altered":true,"baseURL":"http%3A%2F%2Frepositorio.ucjc.edu%2Foai%2Frequest","identifier":"oai:repositorio.ucjc.edu:20.500.12020/227","datestamp":"2017-11-15T16:29:55Z","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"author":[ {"affiliation":[],"fullname":"Jung, Nicole","name":"Nicole","pid":[],"rank":1,"surname":"Jung"},{"affiliation":[],"fullname":"Kumar, Vajinder","name":"Vajinder","pid":[],"rank":1,"surname":"Kumar"},{"affiliation":[],"fullname":"Giri, Santosh Kumar","name":"Santosh Kumar","pid":[],"rank":2,"surname":"Giri"},{"affiliation":[],"fullname":"Venugopalan, Paloth","name":"Paloth","pid":[],"rank":3,"surname":"Venugopalan"},{"affiliation":[],"fullname":"Kartha, K. P. Ravindranathan","name":"K. P. Ravindranathan","pid":[],"rank":4,"surname":"Kartha"}],"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"esl/spa","classname":"Spanish","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[{"value":"Ciencias de la Actividad Física y del Deporte","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Eventos deportivos","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Retos","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Deporte","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[{"value":"I Congreso Nuevos Retos en los Eventos Deportivos","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[],"description":[{"value":"I Congreso Nuevos Retos en los Eventos Deportivos, celebrado en la Universidad Camilo José Cela.\n\n\n\nDerivado del proyecto de investigación \"La situación de los eventos deportivos en España a través de su estructuras y de sus herramientas\", tiene como finalidad conocer la situación de los eventos deportivos en España. Se realizó un estudio descriptivo holístico en el que se investigaron elementos culturales, el perfil de los recursos humanos, la estructura de los comités, entre otras.","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2015-06-20","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":null,"embargoenddate":null,"source":[],"fulltext":[],"format":[],"contributor":[],"resourcetype":null,"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":{"value":"http://creativecommons.org/licenses/by-nc-nd/4.0/","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"instancetype":{"classid":"0024","classname":"Film","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null},"url":["http://hdl.handle.net/20.500.12020/227"],"distributionlocation":"","collectedfrom":{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null},"dateofacceptance":{"value":"2015-06-20","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"storagedate":null,"device":null,"size":null,"version":null,"lastmetadataupdate":null,"metadataversionnumber":null,"geolocation":[]} +{"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1585055869097,"id":"50|od______3989::0f89464c4ac4c398fe0c71433b175a62","originalId":["od______3989::0f89464c4ac4c398fe0c71433b175a62"],"collectedfrom":[{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null}],"pid":[],"dateofcollection":"2020-03-03T13:05:26.069Z","dateoftransformation":"2020-03-03T13:06:53.54Z","extraInfo":[],"oaiprovenance":{"originDescription":{"harvestDate":"2020-03-03T13:05:26.069Z","altered":true,"baseURL":"http%3A%2F%2Frepositorio.ucjc.edu%2Foai%2Frequest","identifier":"oai:repositorio.ucjc.edu:20.500.12020/802","datestamp":"2018-11-14T15:30:47Z","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"author":[{"fullname":"Universidad Camilo José Cela, Ciencias de la Actividad Física y del Deporte","name":"Ciencias La Actividad Física Y. Del Deporte","surname":"Universidad Camilo José Cela","rank":1,"pid":null,"affiliation":null}],"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"esl/spa","classname":"Spanish","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[{"value":"Ciencias de la Actividad Física y del Deporte","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Actividades deportivas","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Impacto físico","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Proyectos de investigación","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"2411.06 Fisiología del Ejercicio","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[{"value":"Investigaciones en Ciencias de la Actividad Física y del Deporte CCAFD - UCJC","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[],"description":[{"value":"Blanca Romero, investigadora y docente de Ciencias de la Actividad Física y del Deporte en la Universidad Camilo José Cela, describe dos de las líneas de investigación que se han desarrollado en el departamento CCAFD de UCJC: el impacto físico de las diferentes actividades deportivas y los métodos de recuperación más eficaces tras el ejercicio físico.\nBlanca también destaca la importancia en el ámbito de la salud los estudios en Ciencias de la Actividad Física y del Deporte.\nCiencias de la Actividad Física y del Deporte en la Universidad Camilo José Cela","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2016-03-21","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":null,"embargoenddate":null,"source":[],"fulltext":[],"format":[],"contributor":[{"value":"Universidad Camilo José Cela, Ciencias de la Actividad Física y del Deporte","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"resourcetype":null,"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":{"value":"http://creativecommons.org/licenses/by-nc-nd/4.0/","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"instancetype":{"classid":"0024","classname":"Film","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null},"url":["http://hdl.handle.net/20.500.12020/802"],"distributionlocation":"","collectedfrom":{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null},"dateofacceptance":{"value":"2016-03-21","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"storagedate":null,"device":null,"size":null,"version":null,"lastmetadataupdate":null,"metadataversionnumber":null,"geolocation":[]} +{"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1585055869215,"id":"50|od______3989::1729c3988199b95d1d566851af7d3c55","originalId":["od______3989::1729c3988199b95d1d566851af7d3c55"],"collectedfrom":[{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null}],"pid":[],"dateofcollection":"2020-03-03T13:05:25.653Z","dateoftransformation":"2020-03-03T13:06:53.764Z","extraInfo":[],"oaiprovenance":{"originDescription":{"harvestDate":"2020-03-03T13:05:25.653Z","altered":true,"baseURL":"http%3A%2F%2Frepositorio.ucjc.edu%2Foai%2Frequest","identifier":"oai:repositorio.ucjc.edu:20.500.12020/801","datestamp":"2018-11-14T15:19:38Z","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"author":[{"fullname":"Universidad Camilo José Cela, Ciencias de la Actividad Física y del Deporte","name":"Ciencias La Actividad Física Y. Del Deporte","surname":"Universidad Camilo José Cela","rank":1,"pid":null,"affiliation":null}],"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"esl/spa","classname":"Spanish","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[{"value":"Ciencias de la Actividad Física y del Deporte","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Prevención","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Hidratación","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Deporte","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Deportistas","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Electrolitos","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Deportes de resistencia","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"2411.06 Fisiología del Ejercicio","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[{"value":"La importancia de la hidratación en competiciones resistencia, por Juan del Coso","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[],"description":[{"value":"Juan del Coso, investigador y profesor en el Grado en Ciencias de la Actividad Física y del Deporte UCJC, presenta el proyecto de investigación que ha realizado el departamento CCAFD de UCJC: la prevención de desequilibrios de agua y electrolitos que se dan en los deportes de resistencia para que los deportistas realicen su actividad de una manera más segura.\nMás información: http://www.ucjc.edu/2016/03/investiga...\nUniversidad Camilo José Cela, Ciencias de la Actividad Física y del Deporte","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2016-03-21","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":null,"embargoenddate":null,"source":[],"fulltext":[],"format":[],"contributor":[{"value":"Grado en Ciencias de la Actividad Física y del Deporte UCJC","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"resourcetype":null,"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":{"value":"http://creativecommons.org/licenses/by-nc-nd/4.0/","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"instancetype":{"classid":"0024","classname":"Film","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null},"url":["http://hdl.handle.net/20.500.12020/801"],"distributionlocation":"","collectedfrom":{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null},"dateofacceptance":{"value":"2016-03-21","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"storagedate":null,"device":null,"size":null,"version":null,"lastmetadataupdate":null,"metadataversionnumber":null,"geolocation":[]} +{"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1585055869487,"id":"50|od______3989::2e3f34ce90520fae350a7e1148d7dcea","originalId":["od______3989::2e3f34ce90520fae350a7e1148d7dcea"],"collectedfrom":[{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null}],"pid":[],"dateofcollection":"2020-03-03T13:05:34.912Z","dateoftransformation":"2020-03-03T13:06:54.218Z","extraInfo":[],"oaiprovenance":{"originDescription":{"harvestDate":"2020-03-03T13:05:34.912Z","altered":true,"baseURL":"http%3A%2F%2Frepositorio.ucjc.edu%2Foai%2Frequest","identifier":"oai:repositorio.ucjc.edu:20.500.12020/800","datestamp":"2018-11-14T13:43:21Z","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"author":[{"fullname":"Universidad Camilo José Cela, Vicerrectorado de Innovación","name":"Vicerrectorado Innovación","surname":"Universidad Camilo José Cela","rank":1,"pid":null,"affiliation":null}],"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"esl/spa","classname":"Spanish","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[{"value":"Ciencias de la Actividad Física y del Deporte","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Deporte","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Cafeína","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Efectos","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Alto rendimiento","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Competiciones","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Beneficios","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"2411.06 Fisiología del Ejercicio","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[{"value":"Investigación - Efectos positivos y negativos del uso de la cafeína en el deporte","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[],"description":[{"value":"Proyecto de investigación que se están desarrollando en la Universidad Camilo José Cela (UCJC) sobre la cafeína y sus efectos cuando se realiza deporte. https://www.ucjc.edu/2016/08/la-bbc-s...\n\nJuan Del Coso, director del Laboratorio de Fisiología del Ejercicio de la Universidad Camilo José Cela, nos describe cómo está siendo la investigación que desarrolla con deportistas de alto rendimiento y el consumo de cafeína.\n\n\"Nuestra investigación se enmarca dentro del campo de la fisiología del ejercicio y el objetivo que tenemos es investigar los efectos que tiene la cafeína en los deportistas, principalmente saber si es una ayuda ergogénica en la mayor parte de los deportes, pero también conocer qué perjuicios existen para aquellos deportistas que la utilizan para incrementar el rendimiento deportivo\", explica Del Coso.\n\nY es que los datos son muy llamativos: tres de cada cuatro deportistas de alto rendimiento utilizan la cafeína antes de la competición y muchas veces se utiliza sin saber los perjuicios que puede tener para el deportista. \"Lo que estamos investigando es cómo una sustancia, que te puede hacer rendir mejor, cómo puede afectar negativamente cuando termina una competición\".\n\nPero esta investigación no sólo puede extraer conclusiones interesantes y útiles para los deportistas profesionales, \"investigando el deporte de alto rendimiento podemos conocer los beneficios y perjuicios de la cafeína que también afectarían al deportista amateur y hacer un deporte en general más seguro\".\n\nPara el desarrollo de este proyecto de investigación están colaborando el Laboratorio de Fisiología del Ejercicio de la Universidad Camilo José Cela y la Agencia para la Protección de la Salud en el Deporte.\nPara el desarrollo de este proyecto de investigación están colaborando el Laboratorio de Fisiología del Ejercicio de la Universidad Camilo José Cela y la Agencia para la Protección de la Salud en el Deporte.","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2017-10-06","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":null,"embargoenddate":null,"source":[],"fulltext":[],"format":[],"contributor":[{"value":"Universidad Camilo José Cela, Vicerrectorado de Innovación","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"resourcetype":null,"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":{"value":"http://creativecommons.org/licenses/by-nc-nd/4.0/","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"instancetype":{"classid":"0024","classname":"Film","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null},"url":["http://hdl.handle.net/20.500.12020/800"],"distributionlocation":"","collectedfrom":{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null},"dateofacceptance":{"value":"2017-10-06","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"storagedate":null,"device":null,"size":null,"version":null,"lastmetadataupdate":null,"metadataversionnumber":null,"geolocation":[]} +{"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1585055869498,"id":"50|od______3989::2f4f3c820c450bd08dac08d07cc82dcf","originalId":["od______3989::2f4f3c820c450bd08dac08d07cc82dcf"],"collectedfrom":[{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null}],"pid":[],"dateofcollection":"2020-03-03T13:05:34.806Z","dateoftransformation":"2020-03-03T13:06:54.229Z","extraInfo":[],"oaiprovenance":{"originDescription":{"harvestDate":"2020-03-03T13:05:34.806Z","altered":true,"baseURL":"http%3A%2F%2Frepositorio.ucjc.edu%2Foai%2Frequest","identifier":"oai:repositorio.ucjc.edu:20.500.12020/797","datestamp":"2018-11-14T12:12:23Z","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"author":[{"fullname":"Cuesta Cano, Laura","name":"Laura","surname":"Cuesta Cano","rank":1,"pid":null,"affiliation":null}],"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"esl/spa","classname":"Spanish","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[{"value":"Ciencias de la Comunicación","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Marketing digital","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Medios sociales","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Marcas","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Empresas","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Emprendedores","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"II Liga de Debate CICAE - UCJC","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"6114.13 Marketing","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[{"value":"Laura Cuesta - Liga de Debate CICAE - UCJC","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[],"description":[{"value":"Laura Cuesta, profesora en la Universidad Camilo José Cela (UCJC) y Especialista en Marketing Digital y Medios Sociales para marcas y emprendedores. \n\nEn esta ocasión, Laura Cuesta, nos explica el significado de redes sociales y nos habla sobre el uso que le dan los usuarios.\n\nII Liga de Debate CICAE - UCJC http://ligacicae.ucjc.edu/\n\nLa Universidad Camilo José Cela (UCJC) y la Asociación de Colegios Privados e Independientes –Círculo de Calidad Educativa (CICAE) – organizan la segunda edición de esta gran liga de debate académico. El formato es al estilo inglés, similar al de la Oxford Union, donde algunos jóvenes se reunían a debatir sobre temas de la más candente actualidad y polemizaban con la intención de formarse en tan valoradas habilidades.\n\nLa Liga de Debate CICAE-UCJC es una ocasión única para los alumnos de ponerse en contacto con herramientas tan útiles como la oratoria y la comunicación efectiva. Pero, sobre todo, una oportunidad de fomentar el pensamiento crítico y la socialización entre alumnos con espíritu emprendedor. La Liga de Debate CICAE - Universidad Camilo José Cela se diferencia del resto de torneos en que potenciamos que sea eminentemente formativa. Alumnos y profesores reciben formación en comunicación, oratoria y debate por parte de alguno de los expertos en debate españoles más reconocidos en la escena internacional.\n\nDatos de la III Liga de Debate CICAE - UCJC http://ligacicae.ucjc.edu/","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2018-10-23","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":null,"embargoenddate":null,"source":[],"fulltext":[],"format":[],"contributor":[{"value":"Universidad Camilo José Cela, Vicerrectorado de Innovación","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"resourcetype":null,"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":{"value":"http://creativecommons.org/licenses/by-nc-nd/4.0/","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"instancetype":{"classid":"0024","classname":"Film","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null},"url":["http://hdl.handle.net/20.500.12020/797"],"distributionlocation":"","collectedfrom":{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null},"dateofacceptance":{"value":"2018-10-23","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"storagedate":null,"device":null,"size":null,"version":null,"lastmetadataupdate":null,"metadataversionnumber":null,"geolocation":[]} +{"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1585055870582,"id":"50|od______3989::752fd0b2bbac1ea1cc50e52fd46eb663","originalId":["od______3989::752fd0b2bbac1ea1cc50e52fd46eb663"],"collectedfrom":[{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null}],"pid":[],"dateofcollection":"2020-03-03T13:05:34.561Z","dateoftransformation":"2020-03-03T13:06:55.716Z","extraInfo":[],"oaiprovenance":{"originDescription":{"harvestDate":"2020-03-03T13:05:34.561Z","altered":true,"baseURL":"http%3A%2F%2Frepositorio.ucjc.edu%2Foai%2Frequest","identifier":"oai:repositorio.ucjc.edu:20.500.12020/798","datestamp":"2018-11-14T12:57:26Z","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"author":[{"fullname":"Universidad Camilo José Cela, Vicerrectorado de Innovación","name":"Vicerrectorado Innovación","surname":"Universidad Camilo José Cela","rank":1,"pid":null,"affiliation":null}],"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"esl/spa","classname":"Spanish","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[{"value":"Psicología","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Alcohol","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Dependencia","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Proyectos de investigación","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Alcoholismo","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Realidad virtual","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Psicología conductual","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[{"value":"Investigación - Intervención basada en realidad virtual sobre la dependencia del alcohol","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[],"description":[{"value":"Proyecto de investigación que se están desarrollando en la Universidad Camilo José Cela (UCJC) con tecnología aplicada al abandono de la dependencia del alcohol. \nRosa Jurado, profesora e investigadora de UCJC https://www.ucjc.edu/, explica cómo se conjuga el alcoholismo con la realidad virtual. \"Mi investigación consiste en desarrollar una técnica de intervención o rehabilitación que tiene como propósito fomentar la capacidad de inhibición para que las personas que tienen dependencia del alcohol sean capaces de controlar su conducta de aproximación hacia aquello que tiene que ver con el consumo de alcohol\".\n\nEn esta investigación, liderada por la Universidad Camilo José Cela, están participando, el Instituto de Investigación Biomédica del Hospital 12 de Octubre y la Universidad Politécnica de Madrid, con el CeDint. \"En nuestro equipo de investigación pensamos que, además del deseo de consumo, existen una carencia clara de capacidad de control inhibitorio en este tipo de personas\", destaca Rosa Jurado.\n\nLa realidad virtual sirve, según explica Gabriel Rubio, jefe de Psiquiatría del Hospital 12 de Octubre, \"para dar un paso más, para ver cómo reacciona un sujeto en un ambiente que no es el real, pero se parece mucho al real\". El objetivo final de estas acciones es \"mejorar la capacidad del individuo para poner en marcha mecanismos de inhibición conductual, para que el sujeto sea capaz de decir que no\".\n\nY no acaba ahí el proceso. La fase final del tratamiento se enfoca en que el paciente \"sea capaz de reconstruir su vida\" a través de programas de entrenamiento de habilidades sociales, asertividad, familias... \"Juntando todas las áreas de intervención, la abstinencia se mantiene mucho más tiempo y las recaídas disminuyen\".","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2017-10-05","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":null,"embargoenddate":null,"source":[],"fulltext":[],"format":[],"contributor":[{"value":"Universidad Camilo José Cela, Vicerrectorado de Innovación","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"resourcetype":null,"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":{"value":"http://creativecommons.org/licenses/by-nc-nd/4.0/","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"instancetype":{"classid":"0024","classname":"Film","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null},"url":["http://hdl.handle.net/20.500.12020/798"],"distributionlocation":"","collectedfrom":{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null},"dateofacceptance":{"value":"2017-10-05","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"storagedate":null,"device":null,"size":null,"version":null,"lastmetadataupdate":null,"metadataversionnumber":null,"geolocation":[]} +{"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1585055870750,"id":"50|od______3989::7fcbe3a03280663cddebfd3cb9203177","originalId":["od______3989::7fcbe3a03280663cddebfd3cb9203177"],"collectedfrom":[{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null}],"pid":[],"dateofcollection":"2020-03-03T13:05:25.652Z","dateoftransformation":"2020-03-03T13:06:55.95Z","extraInfo":[],"oaiprovenance":{"originDescription":{"harvestDate":"2020-03-03T13:05:25.652Z","altered":true,"baseURL":"http%3A%2F%2Frepositorio.ucjc.edu%2Foai%2Frequest","identifier":"oai:repositorio.ucjc.edu:20.500.12020/796","datestamp":"2018-11-05T13:36:54Z","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"author":[{"fullname":"Universidad Camilo José Cela, Vicerrectorado de Innovación","name":"Vicerrectorado Innovación","surname":"Universidad Camilo José Cela","rank":1,"pid":null,"affiliation":null}],"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"esl/spa","classname":"Spanish","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[{"value":"Ciencias de la Comunicación","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Redes sociales","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Comunicación","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Información","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Desinformación","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"III Liga de Debate CICAE","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Instantaneidad","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Información periodística","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Fake news","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"5910.02 Medios de Comunicación de Masas","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[{"value":"Jorge Gallardo - Liga de Debate CICAE","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[],"description":[{"value":"Jorge Gallardo, profesor de Derecho en la Universidad Camilo José Cela (UCJC), Doctor en Comunicación Audiovisual y MBA en Empresas Audiovisuales. Subdirector de Espejo Público en Antena 3 Noticias. \n\nEn esta ocasión, Jorge Gallardo habla sobre las redes sociales, cómo han transformado la manera en la que nos comunicamos y nos informamos. \n\nIII Liga de Debate CICAE - UCJC http://ligacicae.ucjc.edu/\n\nLa Universidad Camilo José Cela (UCJC) y la Asociación de Colegios Privados e Independientes –Círculo de Calidad Educativa (CICAE) – organizan la tercera edición de esta gran liga de debate académico. El formato es al estilo inglés, similar al de la Oxford Union, donde algunos jóvenes se reunían a debatir sobre temas de la más candente actualidad y polemizaban con la intención de formarse en tan valoradas habilidades.\n\nLa Liga de Debate CICAE-UCJC es una ocasión única para los alumnos de ponerse en contacto con herramientas tan útiles como la oratoria y la comunicación efectiva. Pero, sobre todo, una oportunidad de fomentar el pensamiento crítico y la socialización entre alumnos con espíritu emprendedor. La Liga de Debate CICAE - Universidad Camilo José Cela se diferencia del resto de torneos en que potenciamos que sea eminentemente formativa. Alumnos y profesores reciben formación en comunicación, oratoria y debate por parte de alguno de los expertos en debate españoles más reconocidos en la escena internacional.\n\nDatos de la III Liga de Debate CICAE - UCJC http://ligacicae.ucjc.edu/","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2018-10-23","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":null,"embargoenddate":null,"source":[],"fulltext":[],"format":[],"contributor":[],"resourcetype":null,"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":{"value":"http://creativecommons.org/licenses/by-nc-nd/4.0/","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"instancetype":{"classid":"0024","classname":"Film","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null},"url":["http://hdl.handle.net/20.500.12020/796"],"distributionlocation":"","collectedfrom":{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null},"dateofacceptance":{"value":"2018-10-23","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"storagedate":null,"device":null,"size":null,"version":null,"lastmetadataupdate":null,"metadataversionnumber":null,"geolocation":[]} +{"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1585055872001,"id":"50|od______3989::d791339867bec6d3eb2104deeb4e4961","originalId":["od______3989::d791339867bec6d3eb2104deeb4e4961"],"collectedfrom":[{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null}],"pid":[],"dateofcollection":"2020-03-03T13:05:34.586Z","dateoftransformation":"2020-03-03T13:06:57.721Z","extraInfo":[],"oaiprovenance":{"originDescription":{"harvestDate":"2020-03-03T13:05:34.586Z","altered":true,"baseURL":"http%3A%2F%2Frepositorio.ucjc.edu%2Foai%2Frequest","identifier":"oai:repositorio.ucjc.edu:20.500.12020/645","datestamp":"2018-04-12T07:59:27Z","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"author":[{"fullname":"Universidad Camilo José Cela, Vicerrectorado de Innovación","name":"Vicerrectorado Innovación","surname":"Universidad Camilo José Cela","rank":1,"pid":null,"affiliation":null}],"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"esl/spa","classname":"Spanish","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[{"value":"Transferencia de Conocimiento e Innovación","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Innovación","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Investigación","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Ciencia","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Tecnología","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[{"value":"UCJC Open Science Day 2018","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[],"description":[{"value":"El UCJC Open Science Day tiene como objetivo mostrar las actividades de investigación llevadas a cabo por investigadores de la Universidad Camilo José Cela (UCJC) a todos los miembros de la UCJC.","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2018-04-10","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":null,"embargoenddate":null,"source":[],"fulltext":[],"format":[],"contributor":[],"resourcetype":null,"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":{"value":"http://creativecommons.org/licenses/by-nc-nd/4.0/","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"instancetype":{"classid":"0024","classname":"Film","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null},"url":["http://hdl.handle.net/20.500.12020/645"],"distributionlocation":"","collectedfrom":{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null},"dateofacceptance":{"value":"2018-04-10","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"storagedate":null,"device":null,"size":null,"version":null,"lastmetadataupdate":null,"metadataversionnumber":null,"geolocation":[]} +{"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1585055872022,"id":"50|od______3989::d90d3a1f64ad264b5ebed8a35b280343","originalId":["od______3989::d90d3a1f64ad264b5ebed8a35b280343"],"collectedfrom":[{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null}],"pid":[],"dateofcollection":"2020-03-03T13:05:34.912Z","dateoftransformation":"2020-03-03T13:06:57.747Z","extraInfo":[],"oaiprovenance":{"originDescription":{"harvestDate":"2020-03-03T13:05:34.912Z","altered":true,"baseURL":"http%3A%2F%2Frepositorio.ucjc.edu%2Foai%2Frequest","identifier":"oai:repositorio.ucjc.edu:20.500.12020/795","datestamp":"2018-11-06T15:43:10Z","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"author":[{"fullname":"Universidad Camilo José Cela, Vicerrectorado de Innovación","name":"Vicerrectorado Innovación","surname":"Universidad Camilo José Cela","rank":1,"pid":null,"affiliation":null}],"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"esl/spa","classname":"Spanish","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[{"value":"Ciencias de la Comunicación","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Unión Europea","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Desinformación","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Política","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Periodismo","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Ciudadanos europeos","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Democracias","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Estado de derecho","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Derechos humanos","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"5910.02 Medios de Comunicación de Masas","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[{"value":"Ramón Luis Varcárcel - La Unión Europea ante el reto de la desinformación","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[],"description":[{"value":"Conferencia de Ramón Luis Valcárcel, vicepresidente del Parlamento Europeo, en el Campus de Almagro de la Universidad Camilo José Cela (UCJC) bajo el título \"La Unión Europea ante el reto de la desinformación\", en la que ha alertado sobre el alcance de la desinformación en las democracias y sobre cómo pone en peligro los derechos de los ciudadanos europeos. También ha participado el rector de la UCJC, Samuel Martín-Barbero.","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2018-10-11","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":null,"embargoenddate":null,"source":[],"fulltext":[],"format":[],"contributor":[],"resourcetype":null,"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":{"value":"http://creativecommons.org/licenses/by-nc-nd/4.0/","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"instancetype":{"classid":"0024","classname":"Film","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null},"url":["http://hdl.handle.net/20.500.12020/795"],"distributionlocation":"","collectedfrom":{"key":"opendoar____::3989","value":"Depósito Digital e-UCJC","dataInfo":null},"dateofacceptance":{"value":"2018-10-11","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"storagedate":null,"device":null,"size":null,"version":null,"lastmetadataupdate":null,"metadataversionnumber":null,"geolocation":[]} diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/sample/oneupdate/otherresearchproduct/empty.json b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/sample/oneupdate/otherresearchproduct/empty.json new file mode 100644 index 000000000..e69de29bb diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/sample/oneupdate/publication/empty.json b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/sample/oneupdate/publication/empty.json new file mode 100644 index 000000000..e69de29bb diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/sample/oneupdate/relation/relation.json b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/sample/oneupdate/relation/relation.json new file mode 100644 index 000000000..72c48e911 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/sample/oneupdate/relation/relation.json @@ -0,0 +1 @@ +{"subRelType": "supplement", "relClass": "isSupplementedBy", "dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.7731"}, "target": "50|dedup_wf_001::95b033c0c3961f6a1cdcd41a99a9632e", "lastupdatetimestamp": 1694431186898, "relType": "resultOrganization", "source": "50|dedup_wf_001::36bcfaa1494c849547a346da688ade24", "collectedfrom": [], "validated": false, "properties": []} diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/sample/oneupdate/software/empty.json b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/sample/oneupdate/software/empty.json new file mode 100644 index 000000000..e69de29bb