From 0bdfbb0a5794125b4bcbd761a0490d708997bbdd Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 19 May 2020 15:02:21 +0200 Subject: [PATCH 01/37] reintroduced RDD based relation cut off procedure --- .../dhp/oa/provision/PrepareRelationsJob.java | 48 ++++++++++++++----- .../oa/provision/model/SortableRelation.java | 6 +-- .../input_params_prepare_relations.json | 14 +++++- .../dhp/oa/provision/oozie_app/workflow.xml | 4 +- 4 files changed, 56 insertions(+), 16 deletions(-) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java index dbdc54fc04..32a20d62c4 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java @@ -3,7 +3,9 @@ package eu.dnetlib.dhp.oa.provision; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import java.util.HashSet; import java.util.Optional; +import java.util.Set; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; @@ -15,17 +17,21 @@ import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SaveMode; import org.apache.spark.sql.SparkSession; +import org.apache.spark.sql.sources.In; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.base.Splitter; import com.google.common.collect.Iterables; import com.google.common.collect.Iterators; +import com.google.common.collect.Sets; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.oa.provision.model.SortableRelation; import eu.dnetlib.dhp.oa.provision.utils.RelationPartitioner; +import scala.Int; import scala.Tuple2; /** @@ -58,6 +64,8 @@ public class PrepareRelationsJob { public static final int MAX_RELS = 100; + public static final int DEFAULT_NUM_PARTITIONS = 3000; + public static void main(String[] args) throws Exception { String jsonConfiguration = IOUtils .toString( @@ -79,6 +87,24 @@ public class PrepareRelationsJob { String outputPath = parser.get("outputPath"); log.info("outputPath: {}", outputPath); + int relPartitions = Optional + .ofNullable(parser.get("relPartitions")) + .map(Integer::valueOf) + .orElse(DEFAULT_NUM_PARTITIONS); + log.info("relPartitions: {}", relPartitions); + + Set relationFilter = Optional + .ofNullable(parser.get("relationFilter")) + .map(s -> Sets.newHashSet(Splitter.on(",").split(s))) + .orElse(new HashSet<>()); + log.info("relationFilter: {}", relationFilter); + + int maxRelations = Optional + .ofNullable(parser.get("maxRelations")) + .map(Integer::valueOf) + .orElse(MAX_RELS); + log.info("maxRelations: {}", maxRelations); + SparkConf conf = new SparkConf(); runWithSparkSession( @@ -86,12 +112,13 @@ public class PrepareRelationsJob { isSparkSessionManaged, spark -> { removeOutputDir(spark, outputPath); - prepareRelationsFromPaths(spark, inputRelationsPath, outputPath); + prepareRelationsRDDFromPaths( + spark, inputRelationsPath, outputPath, relationFilter, relPartitions, maxRelations); }); } private static void prepareRelationsFromPaths( - SparkSession spark, String inputRelationsPath, String outputPath) { + SparkSession spark, String inputRelationsPath, String outputPath, Set relationFilter) { readPathRelation(spark, inputRelationsPath) .filter("dataInfo.deletedbyinference == false") .groupByKey( @@ -125,20 +152,19 @@ public class PrepareRelationsJob { // TODO work in progress private static void prepareRelationsRDDFromPaths( - SparkSession spark, String inputRelationsPath, String outputPath, int numPartitions) { - JavaRDD rels = readPathRelationRDD(spark, inputRelationsPath).repartition(numPartitions); + SparkSession spark, String inputRelationsPath, String outputPath, Set relationFilter, int relPartitions, + int maxRelations) { + JavaRDD rels = readPathRelationRDD(spark, inputRelationsPath).repartition(relPartitions); + // only consider those that are not virtually deleted RDD d = rels - .filter(rel -> !rel.getDataInfo().getDeletedbyinference()) // only - // consider - // those - // that are not virtually - // deleted + .filter(rel -> !rel.getDataInfo().getDeletedbyinference()) + .filter(rel -> !relationFilter.contains(rel.getRelClass())) .mapToPair( (PairFunction) rel -> new Tuple2<>(rel, rel)) .groupByKey(new RelationPartitioner(rels.getNumPartitions())) - .map(p -> Iterables.limit(p._2(), MAX_RELS)) - .flatMap(p -> p.iterator()) + .map(group -> Iterables.limit(group._2(), maxRelations)) + .flatMap(group -> group.iterator()) .rdd(); spark diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/SortableRelation.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/SortableRelation.java index 7c866001be..b6571b9bf9 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/SortableRelation.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/SortableRelation.java @@ -16,10 +16,10 @@ public class SortableRelation extends Relation implements Comparable, static { weights.put("outcome", 0); weights.put("supplement", 1); - weights.put("publicationDataset", 2); + weights.put("affiliation", 2); weights.put("relationship", 3); - weights.put("similarity", 4); - weights.put("affiliation", 5); + weights.put("publicationDataset", 4); + weights.put("similarity", 5); weights.put("provision", 6); weights.put("participation", 7); diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_prepare_relations.json b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_prepare_relations.json index bfb248d012..5ce37aa7be 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_prepare_relations.json +++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_prepare_relations.json @@ -21,6 +21,18 @@ "paramName": "rp", "paramLongName": "relPartitions", "paramDescription": "number or partitions for the relations Dataset", - "paramRequired": true + "paramRequired": false + }, + { + "paramName": "rf", + "paramLongName": "relationFilter", + "paramDescription": "filter applied reading relations (by relClass)", + "paramRequired": false + }, + { + "paramName": "mr", + "paramLongName": "maxRelations", + "paramDescription": "maximum number of relations applied reading relations (by relClass)", + "paramRequired": false } ] diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml index 298ac75892..a84db86880 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml @@ -102,7 +102,9 @@ --inputRelationsPath${inputGraphRootPath}/relation --outputPath${workingDir}/relation - --relPartitions3000 + --relPartitions${relPartitions} + --relationFilter${relationFilter} + --maxRelations${maxRelations} From d7d2a0637f262f3e51c519db78744ef8c713de0f Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 20 May 2020 14:55:38 +0200 Subject: [PATCH 02/37] added extra parameters to the provision indexing workflow --- .../input_params_prepare_relations.json | 2 +- .../dhp/oa/provision/oozie_app/workflow.xml | 25 +++++++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_prepare_relations.json b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_prepare_relations.json index 5ce37aa7be..71b2becc4d 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_prepare_relations.json +++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_prepare_relations.json @@ -32,7 +32,7 @@ { "paramName": "mr", "paramLongName": "maxRelations", - "paramDescription": "maximum number of relations applied reading relations (by relClass)", + "paramDescription": "maximum number of relations allowed for a each entity", "paramRequired": false } ] diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml index a84db86880..9600ccbaa4 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml @@ -9,6 +9,30 @@ isLookupUrl URL for the isLookup service + + relPartitions + number or partitions for the relations Dataset + + + relationFilter + filter applied reading relations (by relClass) + + + maxRelations + maximum number of relations allowed for a each entity + + + otherDsTypeId + mapping used to populate datasourceTypeUi field + + + format + metadata format name (DMF|TMF) + + + batchSize + number of records to be included in each indexing request + sparkDriverMemoryForJoining @@ -421,4 +445,5 @@ + \ No newline at end of file From dbfb9c19fe12181c61ef0435937b298a863ed555 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 21 May 2020 10:00:14 +0200 Subject: [PATCH 03/37] minor changes --- .../dhp/oa/provision/PrepareRelationsJob.java | 82 ++++++++++++------- .../dhp/oa/provision/oozie_app/workflow.xml | 9 +- 2 files changed, 59 insertions(+), 32 deletions(-) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java index 32a20d62c4..72d68a389e 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java @@ -17,7 +17,6 @@ import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SaveMode; import org.apache.spark.sql.SparkSession; -import org.apache.spark.sql.sources.In; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -31,7 +30,6 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.oa.provision.model.SortableRelation; import eu.dnetlib.dhp.oa.provision.utils.RelationPartitioner; -import scala.Int; import scala.Tuple2; /** @@ -112,26 +110,74 @@ public class PrepareRelationsJob { isSparkSessionManaged, spark -> { removeOutputDir(spark, outputPath); - prepareRelationsRDDFromPaths( + prepareRelationsRDD( spark, inputRelationsPath, outputPath, relationFilter, relPartitions, maxRelations); }); } - private static void prepareRelationsFromPaths( - SparkSession spark, String inputRelationsPath, String outputPath, Set relationFilter) { + /** + * Dataset based implementation that prepares the graph relations by limiting the number of outgoing links and + * filtering the relation types according to the given criteria. + * + * @param spark the spark session + * @param inputRelationsPath source path for the graph relations + * @param outputPath output path for the processed relations + * @param relationFilter set of relation filters applied to the `relClass` field + * @param maxRelations maximum number of allowed outgoing edges + */ + private static void prepareRelations( + SparkSession spark, String inputRelationsPath, String outputPath, Set relationFilter, + int maxRelations) { readPathRelation(spark, inputRelationsPath) .filter("dataInfo.deletedbyinference == false") + .filter((FilterFunction) rel -> !relationFilter.contains(rel.getRelClass())) .groupByKey( (MapFunction) value -> value.getSource(), Encoders.STRING()) .flatMapGroups( (FlatMapGroupsFunction) (key, values) -> Iterators - .limit(values, MAX_RELS), + .limit(values, maxRelations), Encoders.bean(SortableRelation.class)) .write() .mode(SaveMode.Overwrite) .parquet(outputPath); } + /** + * RDD based implementation that prepares the graph relations by limiting the number of outgoing links and filtering + * the relation types according to the given criteria. Moreover, outgoing links kept within the given limit are + * prioritized according to the weights indicated in eu.dnetlib.dhp.oa.provision.model.SortableRelation. + * + * @param spark the spark session + * @param inputRelationsPath source path for the graph relations + * @param outputPath output path for the processed relations + * @param relationFilter set of relation filters applied to the `relClass` field + * @param maxRelations maximum number of allowed outgoing edges + */ + // TODO work in progress + private static void prepareRelationsRDD( + SparkSession spark, String inputRelationsPath, String outputPath, Set relationFilter, int relPartitions, + int maxRelations) { + JavaRDD rels = readPathRelationRDD(spark, inputRelationsPath).repartition(relPartitions); + RelationPartitioner partitioner = new RelationPartitioner(rels.getNumPartitions()); + + // only consider those that are not virtually deleted + RDD d = rels + .filter(rel -> !rel.getDataInfo().getDeletedbyinference()) + .filter(rel -> !relationFilter.contains(rel.getRelClass())) + .mapToPair( + (PairFunction) rel -> new Tuple2<>(rel, rel)) + .groupByKey(partitioner) + .map(group -> Iterables.limit(group._2(), maxRelations)) + .flatMap(group -> group.iterator()) + .rdd(); + + spark + .createDataset(d, Encoders.bean(SortableRelation.class)) + .write() + .mode(SaveMode.Overwrite) + .parquet(outputPath); + } + /** * Reads a Dataset of eu.dnetlib.dhp.oa.provision.model.SortableRelation objects from a newline delimited json text * file, @@ -150,30 +196,6 @@ public class PrepareRelationsJob { Encoders.bean(SortableRelation.class)); } - // TODO work in progress - private static void prepareRelationsRDDFromPaths( - SparkSession spark, String inputRelationsPath, String outputPath, Set relationFilter, int relPartitions, - int maxRelations) { - JavaRDD rels = readPathRelationRDD(spark, inputRelationsPath).repartition(relPartitions); - - // only consider those that are not virtually deleted - RDD d = rels - .filter(rel -> !rel.getDataInfo().getDeletedbyinference()) - .filter(rel -> !relationFilter.contains(rel.getRelClass())) - .mapToPair( - (PairFunction) rel -> new Tuple2<>(rel, rel)) - .groupByKey(new RelationPartitioner(rels.getNumPartitions())) - .map(group -> Iterables.limit(group._2(), maxRelations)) - .flatMap(group -> group.iterator()) - .rdd(); - - spark - .createDataset(d, Encoders.bean(SortableRelation.class)) - .write() - .mode(SaveMode.Overwrite) - .parquet(outputPath); - } - private static JavaRDD readPathRelationRDD( SparkSession spark, final String inputPath) { JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml index 9600ccbaa4..fd8f5ba89a 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml @@ -80,6 +80,11 @@ spark2EventLogDir spark 2.* event log dir location + + sparkNetworkTimeout + configures spark.network.timeout + + @@ -357,7 +362,7 @@ --inputGraphRootPath${inputGraphRootPath} --inputRelatedEntitiesPath${workingDir}/join_partial --outputPath${workingDir}/join_entities - --numPartitions12000 + --numPartitions24000 @@ -381,7 +386,7 @@ --conf spark.sql.shuffle.partitions=7680 --conf spark.network.timeout=${sparkNetworkTimeout} - --inputPath ${workingDir}/join_entities + --inputPath${workingDir}/join_entities --outputPath${workingDir}/joined From 3e345174790135695590409284747c42d6c9cbf3 Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Thu, 21 May 2020 16:47:53 +0200 Subject: [PATCH 04/37] partial implementation of events with rels --- .../eu/dnetlib/dhp/broker/model/Topic.java | 69 ++++---- .../broker/oa/GenerateEventsApplication.java | 161 +++++++++++++++--- .../oa/matchers/EnrichMissingAbstract.java | 5 +- .../oa/matchers/EnrichMissingAuthorOrcid.java | 5 +- .../EnrichMissingDatasetIsReferencedBy.java | 38 +++++ .../EnrichMissingDatasetIsRelatedTo.java | 38 +++++ .../EnrichMissingDatasetIsSupplementedBy.java | 38 +++++ .../EnrichMissingDatasetIsSupplementedTo.java | 38 +++++ .../EnrichMissingDatasetReferences.java | 38 +++++ .../oa/matchers/EnrichMissingOpenAccess.java | 2 +- .../broker/oa/matchers/EnrichMissingPid.java | 2 +- .../oa/matchers/EnrichMissingProject.java | 21 ++- .../EnrichMissingPublicationDate.java | 2 +- ...nrichMissingPublicationIsReferencedBy.java | 42 +++++ .../EnrichMissingPublicationIsRelatedTo.java | 42 +++++ ...ichMissingPublicationIsSupplementedBy.java | 42 +++++ ...ichMissingPublicationIsSupplementedTo.java | 42 +++++ .../EnrichMissingPublicationReferences.java | 42 +++++ .../oa/matchers/EnrichMissingSoftware.java | 41 +++++ .../oa/matchers/EnrichMissingSubject.java | 2 +- .../oa/matchers/EnrichMoreOpenAccess.java | 2 +- .../dhp/broker/oa/matchers/EnrichMorePid.java | 2 +- .../broker/oa/matchers/EnrichMoreProject.java | 39 +++++ .../oa/matchers/EnrichMoreSoftware.java | 41 +++++ .../broker/oa/matchers/EnrichMoreSubject.java | 2 +- .../dhp/broker/oa/matchers/UpdateMatcher.java | 16 +- .../dhp/broker/oa/util/BrokerConstants.java | 2 + 27 files changed, 729 insertions(+), 85 deletions(-) create mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingDatasetIsReferencedBy.java create mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingDatasetIsRelatedTo.java create mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingDatasetIsSupplementedBy.java create mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingDatasetIsSupplementedTo.java create mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingDatasetReferences.java create mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationIsReferencedBy.java create mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationIsRelatedTo.java create mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationIsSupplementedBy.java create mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationIsSupplementedTo.java create mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationReferences.java create mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingSoftware.java create mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMoreProject.java create mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMoreSoftware.java diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/Topic.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/Topic.java index 98088dd0aa..0716bd98de 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/Topic.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/Topic.java @@ -4,48 +4,45 @@ package eu.dnetlib.dhp.broker.model; public enum Topic { // ENRICHMENT MISSING - ENRICH_MISSING_OA_VERSION("ENRICH/MISSING/OPENACCESS_VERSION"), - ENRICH_MISSING_ABSTRACT("ENRICH/MISSING/ABSTRACT"), - ENRICH_MISSING_PUBLICATION_DATE("ENRICH/MISSING/PUBLICATION_DATE"), - ENRICH_MISSING_PID("ENRICH/MISSING/PID"), - ENRICH_MISSING_PROJECT("ENRICH/MISSING/PROJECT"), - ENRICH_MISSING_SOFTWARE("ENRICH/MISSING/SOFTWARE"), - ENRICH_MISSING_SUBJECT_MESHEUROPMC("ENRICH/MISSING/SUBJECT/MESHEUROPMC"), - ENRICH_MISSING_SUBJECT_ARXIV("ENRICH/MISSING/SUBJECT/ARXIV"), - ENRICH_MISSING_SUBJECT_JEL("ENRICH/MISSING/SUBJECT/JEL"), - ENRICH_MISSING_SUBJECT_DDC("ENRICH/MISSING/SUBJECT/DDC"), - ENRICH_MISSING_SUBJECT_ACM("ENRICH/MISSING/SUBJECT/ACM"), - ENRICH_MISSING_SUBJECT_RVK("ENRICH/MISSING/SUBJECT/RVK"), - ENRICH_MISSING_AUTHOR_ORCID("ENRICH/MISSING/AUTHOR/ORCID"), + ENRICH_MISSING_OA_VERSION("ENRICH/MISSING/OPENACCESS_VERSION"), ENRICH_MISSING_ABSTRACT( + "ENRICH/MISSING/ABSTRACT"), ENRICH_MISSING_PUBLICATION_DATE( + "ENRICH/MISSING/PUBLICATION_DATE"), ENRICH_MISSING_PID( + "ENRICH/MISSING/PID"), ENRICH_MISSING_PROJECT("ENRICH/MISSING/PROJECT"), ENRICH_MISSING_SOFTWARE( + "ENRICH/MISSING/SOFTWARE"), ENRICH_MISSING_SUBJECT_MESHEUROPMC( + "ENRICH/MISSING/SUBJECT/MESHEUROPMC"), ENRICH_MISSING_SUBJECT_ARXIV( + "ENRICH/MISSING/SUBJECT/ARXIV"), ENRICH_MISSING_SUBJECT_JEL( + "ENRICH/MISSING/SUBJECT/JEL"), ENRICH_MISSING_SUBJECT_DDC( + "ENRICH/MISSING/SUBJECT/DDC"), ENRICH_MISSING_SUBJECT_ACM( + "ENRICH/MISSING/SUBJECT/ACM"), ENRICH_MISSING_SUBJECT_RVK( + "ENRICH/MISSING/SUBJECT/RVK"), ENRICH_MISSING_AUTHOR_ORCID( + "ENRICH/MISSING/AUTHOR/ORCID"), // ENRICHMENT MORE - ENRICH_MORE_PID("ENRICH/MORE/PID"), - ENRICH_MORE_OA_VERSION("ENRICH/MORE/OPENACCESS_VERSION"), - ENRICH_MORE_ABSTRACT("ENRICH/MORE/ABSTRACT"), - ENRICH_MORE_PUBLICATION_DATE("ENRICH/MORE/PUBLICATION_DATE"), - ENRICH_MORE_PROJECT("ENRICH/MORE/PROJECT"), - ENRICH_MORE_SUBJECT_MESHEUROPMC("ENRICH/MORE/SUBJECT/MESHEUROPMC"), - ENRICH_MORE_SUBJECT_ARXIV("ENRICH/MORE/SUBJECT/ARXIV"), - ENRICH_MORE_SUBJECT_JEL("ENRICH/MORE/SUBJECT/JEL"), - ENRICH_MORE_SUBJECT_DDC("ENRICH/MORE/SUBJECT/DDC"), - ENRICH_MORE_SUBJECT_ACM("ENRICH/MORE/SUBJECT/ACM"), - ENRICH_MORE_SUBJECT_RVK("ENRICH/MORE/SUBJECT/RVK"), + ENRICH_MORE_PID("ENRICH/MORE/PID"), ENRICH_MORE_OA_VERSION("ENRICH/MORE/OPENACCESS_VERSION"), ENRICH_MORE_ABSTRACT( + "ENRICH/MORE/ABSTRACT"), ENRICH_MORE_PUBLICATION_DATE("ENRICH/MORE/PUBLICATION_DATE"), ENRICH_MORE_PROJECT( + "ENRICH/MORE/PROJECT"), ENRICH_MORE_SOFTWARE("ENRICH/MORE/SOFTWARE"), ENRICH_MORE_SUBJECT_MESHEUROPMC( + "ENRICH/MORE/SUBJECT/MESHEUROPMC"), ENRICH_MORE_SUBJECT_ARXIV( + "ENRICH/MORE/SUBJECT/ARXIV"), ENRICH_MORE_SUBJECT_JEL( + "ENRICH/MORE/SUBJECT/JEL"), ENRICH_MORE_SUBJECT_DDC( + "ENRICH/MORE/SUBJECT/DDC"), ENRICH_MORE_SUBJECT_ACM( + "ENRICH/MORE/SUBJECT/ACM"), ENRICH_MORE_SUBJECT_RVK("ENRICH/MORE/SUBJECT/RVK"), // ADDITION ADD_BY_PROJECT("ADD/BY_PROJECT"), // OTHER RELS - ENRICH_MISSING_PUBLICATION_IS_RELATED_TO("ENRICH/MISSING/PUBLICATION/IS_RELATED_TO"), - ENRICH_MISSING_PUBLICATION_REFERENCES("ENRICH/MISSING/PUBLICATION/REFERENCES"), - ENRICH_MISSING_PUBLICATION_IS_REFERENCED_BY("ENRICH/MISSING/PUBLICATION/IS_REFERENCED_BY"), - ENRICH_MISSING_PUBLICATION_IS_SUPPLEMENTED_TO("ENRICH/MISSING/PUBLICATION/IS_SUPPLEMENTED_TO"), - ENRICH_MISSING_PUBLICATION_IS_SUPPLEMENTED_BY("ENRICH/MISSING/PUBLICATION/IS_SUPPLEMENTED_BY"), + ENRICH_MISSING_PUBLICATION_IS_RELATED_TO( + "ENRICH/MISSING/PUBLICATION/IS_RELATED_TO"), ENRICH_MISSING_PUBLICATION_REFERENCES( + "ENRICH/MISSING/PUBLICATION/REFERENCES"), ENRICH_MISSING_PUBLICATION_IS_REFERENCED_BY( + "ENRICH/MISSING/PUBLICATION/IS_REFERENCED_BY"), ENRICH_MISSING_PUBLICATION_IS_SUPPLEMENTED_TO( + "ENRICH/MISSING/PUBLICATION/IS_SUPPLEMENTED_TO"), ENRICH_MISSING_PUBLICATION_IS_SUPPLEMENTED_BY( + "ENRICH/MISSING/PUBLICATION/IS_SUPPLEMENTED_BY"), - ENRICH_MISSING_DATASET_IS_RELATED_TO("ENRICH/MISSING/DATASET/IS_RELATED_TO"), - ENRICH_MISSING_DATASET_REFERENCES("ENRICH/MISSING/DATASET/REFERENCES"), - ENRICH_MISSING_DATASET_IS_REFERENCED_BY("ENRICH/MISSING/DATASET/IS_REFERENCED_BY"), - ENRICH_MISSING_DATASET_IS_SUPPLEMENTED_TO("ENRICH/MISSING/DATASET/IS_SUPPLEMENTED_TO"), - ENRICH_MISSING_DATASET_IS_SUPPLEMENTED_BY("ENRICH/MISSING/DATASET/IS_SUPPLEMENTED_BY"); + ENRICH_MISSING_DATASET_IS_RELATED_TO("ENRICH/MISSING/DATASET/IS_RELATED_TO"), ENRICH_MISSING_DATASET_REFERENCES( + "ENRICH/MISSING/DATASET/REFERENCES"), ENRICH_MISSING_DATASET_IS_REFERENCED_BY( + "ENRICH/MISSING/DATASET/IS_REFERENCED_BY"), ENRICH_MISSING_DATASET_IS_SUPPLEMENTED_TO( + "ENRICH/MISSING/DATASET/IS_SUPPLEMENTED_TO"), ENRICH_MISSING_DATASET_IS_SUPPLEMENTED_BY( + "ENRICH/MISSING/DATASET/IS_SUPPLEMENTED_BY"),; Topic(final String path) { this.path = path; @@ -59,7 +56,9 @@ public enum Topic { public static Topic fromPath(final String path) { for (final Topic t : Topic.values()) { - if (t.getPath().equals(path)) { return t; } + if (t.getPath().equals(path)) { + return t; + } } return null; } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java index 5fdd109252..fa425a1819 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java @@ -4,11 +4,14 @@ package eu.dnetlib.dhp.broker.oa; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.util.ArrayList; +import java.util.Collection; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; @@ -29,18 +32,33 @@ import eu.dnetlib.dhp.broker.model.Event; import eu.dnetlib.dhp.broker.model.EventFactory; import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingAbstract; import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingAuthorOrcid; +import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingDatasetIsReferencedBy; +import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingDatasetIsRelatedTo; +import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingDatasetIsSupplementedBy; +import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingDatasetIsSupplementedTo; +import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingDatasetReferences; import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingOpenAccess; import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingPid; import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingProject; import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingPublicationDate; +import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingPublicationIsReferencedBy; +import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingPublicationIsRelatedTo; +import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingPublicationIsSupplementedBy; +import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingPublicationIsSupplementedTo; +import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingPublicationReferences; +import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingSoftware; import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingSubject; import eu.dnetlib.dhp.broker.oa.matchers.EnrichMoreOpenAccess; import eu.dnetlib.dhp.broker.oa.matchers.EnrichMorePid; +import eu.dnetlib.dhp.broker.oa.matchers.EnrichMoreProject; +import eu.dnetlib.dhp.broker.oa.matchers.EnrichMoreSoftware; import eu.dnetlib.dhp.broker.oa.matchers.EnrichMoreSubject; import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; +import eu.dnetlib.dhp.broker.oa.util.BrokerConstants; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct; +import eu.dnetlib.dhp.schema.oaf.Project; import eu.dnetlib.dhp.schema.oaf.Publication; import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.schema.oaf.Result; @@ -50,24 +68,44 @@ public class GenerateEventsApplication { private static final Logger log = LoggerFactory.getLogger(GenerateEventsApplication.class); - private static final UpdateMatcher enrichMissingAbstract = new EnrichMissingAbstract(); - private static final UpdateMatcher enrichMissingAuthorOrcid = new EnrichMissingAuthorOrcid(); - private static final UpdateMatcher enrichMissingOpenAccess = new EnrichMissingOpenAccess(); - private static final UpdateMatcher enrichMissingPid = new EnrichMissingPid(); - private static final UpdateMatcher enrichMissingProject = new EnrichMissingProject(); - private static final UpdateMatcher enrichMissingPublicationDate = new EnrichMissingPublicationDate(); - private static final UpdateMatcher enrichMissingSubject = new EnrichMissingSubject(); - private static final UpdateMatcher enrichMoreOpenAccess = new EnrichMoreOpenAccess(); - private static final UpdateMatcher enrichMorePid = new EnrichMorePid(); - private static final UpdateMatcher enrichMoreSubject = new EnrichMoreSubject(); + // Simple Matchers + private static final UpdateMatcher enrichMissingAbstract = new EnrichMissingAbstract(); + private static final UpdateMatcher enrichMissingAuthorOrcid = new EnrichMissingAuthorOrcid(); + private static final UpdateMatcher enrichMissingOpenAccess = new EnrichMissingOpenAccess(); + private static final UpdateMatcher enrichMissingPid = new EnrichMissingPid(); + private static final UpdateMatcher enrichMissingPublicationDate = new EnrichMissingPublicationDate(); + private static final UpdateMatcher enrichMissingSubject = new EnrichMissingSubject(); + private static final UpdateMatcher enrichMoreOpenAccess = new EnrichMoreOpenAccess(); + private static final UpdateMatcher enrichMorePid = new EnrichMorePid(); + private static final UpdateMatcher enrichMoreSubject = new EnrichMoreSubject(); + + // Advanced matchers + private static final UpdateMatcher>, ?> enrichMissingProject = new EnrichMissingProject(); + private static final UpdateMatcher>, ?> enrichMoreProject = new EnrichMoreProject(); + + private static final UpdateMatcher>, ?> enrichMissingSoftware = new EnrichMissingSoftware(); + private static final UpdateMatcher>, ?> enrichMoreSoftware = new EnrichMoreSoftware(); + + private static final UpdateMatcher>, ?> enrichMisissingPublicationIsRelatedTo = new EnrichMissingPublicationIsRelatedTo(); + private static final UpdateMatcher>, ?> enrichMissingPublicationIsReferencedBy = new EnrichMissingPublicationIsReferencedBy(); + private static final UpdateMatcher>, ?> enrichMissingPublicationReferences = new EnrichMissingPublicationReferences(); + private static final UpdateMatcher>, ?> enrichMissingPublicationIsSupplementedTo = new EnrichMissingPublicationIsSupplementedTo(); + private static final UpdateMatcher>, ?> enrichMissingPublicationIsSupplementedBy = new EnrichMissingPublicationIsSupplementedBy(); + + private static final UpdateMatcher>, ?> enrichMisissingDatasetIsRelatedTo = new EnrichMissingDatasetIsRelatedTo(); + private static final UpdateMatcher>, ?> enrichMissingDatasetIsReferencedBy = new EnrichMissingDatasetIsReferencedBy(); + private static final UpdateMatcher>, ?> enrichMissingDatasetReferences = new EnrichMissingDatasetReferences(); + private static final UpdateMatcher>, ?> enrichMissingDatasetIsSupplementedTo = new EnrichMissingDatasetIsSupplementedTo(); + private static final UpdateMatcher>, ?> enrichMissingDatasetIsSupplementedBy = new EnrichMissingDatasetIsSupplementedBy(); public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); public static void main(final String[] args) throws Exception { final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils - .toString(GenerateEventsApplication.class - .getResourceAsStream("/eu/dnetlib/dhp/oa/graph/merge_claims_parameters.json"))); + .toString( + GenerateEventsApplication.class + .getResourceAsStream("/eu/dnetlib/dhp/oa/graph/merge_claims_parameters.json"))); parser.parseArgument(args); final Boolean isSparkSessionManaged = Optional @@ -82,9 +120,6 @@ public class GenerateEventsApplication { final String eventsPath = parser.get("eventsPath"); log.info("eventsPath: {}", eventsPath); - final String resultClassName = parser.get("resultTableName"); - log.info("resultTableName: {}", resultClassName); - final SparkConf conf = new SparkConf(); runWithSparkSession(conf, isSparkSessionManaged, spark -> { @@ -111,17 +146,17 @@ public class GenerateEventsApplication { final String graphPath, final Class resultClazz) { - final Dataset results = - readPath(spark, graphPath + "/" + resultClazz.getSimpleName().toLowerCase(), resultClazz) + final Dataset results = readPath( + spark, graphPath + "/" + resultClazz.getSimpleName().toLowerCase(), resultClazz) .filter(r -> r.getDataInfo().getDeletedbyinference()); - final Dataset rels = - readPath(spark, graphPath + "/relation", Relation.class) - .filter(r -> r.getRelClass().equals("TODO")); // TODO mergedIN + final Dataset rels = readPath(spark, graphPath + "/relation", Relation.class) + .filter(r -> r.getRelClass().equals(BrokerConstants.IS_MERGED_IN_CLASS)); final Column c = null; // TODO - final Dataset aa = results.joinWith(rels, results.col("id").equalTo(rels.col("source")), "inner") + final Dataset aa = results + .joinWith(rels, results.col("id").equalTo(rels.col("source")), "inner") .groupBy(rels.col("target")) .agg(c) .filter(x -> x.size() > 1) @@ -134,7 +169,7 @@ public class GenerateEventsApplication { } - private List generateSimpleEvents(final Result... children) { + private List generateSimpleEvents(final Collection children) { final List> list = new ArrayList<>(); for (final Result target : children) { @@ -142,7 +177,6 @@ public class GenerateEventsApplication { list.addAll(enrichMissingAuthorOrcid.searchUpdatesForRecord(target, children)); list.addAll(enrichMissingOpenAccess.searchUpdatesForRecord(target, children)); list.addAll(enrichMissingPid.searchUpdatesForRecord(target, children)); - list.addAll(enrichMissingProject.searchUpdatesForRecord(target, children)); list.addAll(enrichMissingPublicationDate.searchUpdatesForRecord(target, children)); list.addAll(enrichMissingSubject.searchUpdatesForRecord(target, children)); list.addAll(enrichMoreOpenAccess.searchUpdatesForRecord(target, children)); @@ -153,6 +187,87 @@ public class GenerateEventsApplication { return list.stream().map(EventFactory::newBrokerEvent).collect(Collectors.toList()); } + private List generateProjectsEvents(final Collection>> childrenWithProjects) { + final List> list = new ArrayList<>(); + + for (final Pair> target : childrenWithProjects) { + list.addAll(enrichMissingProject.searchUpdatesForRecord(target, childrenWithProjects)); + list.addAll(enrichMoreProject.searchUpdatesForRecord(target, childrenWithProjects)); + } + + return list.stream().map(EventFactory::newBrokerEvent).collect(Collectors.toList()); + } + + private List generateSoftwareEvents(final Collection>> childrenWithSoftwares) { + final List> list = new ArrayList<>(); + + for (final Pair> target : childrenWithSoftwares) { + list.addAll(enrichMissingSoftware.searchUpdatesForRecord(target, childrenWithSoftwares)); + list.addAll(enrichMoreSoftware.searchUpdatesForRecord(target, childrenWithSoftwares)); + } + return list.stream().map(EventFactory::newBrokerEvent).collect(Collectors.toList()); + } + + private List generatePublicationRelatedEvents(final String relType, + final Collection>>> childrenWithRels) { + + final List> list = new ArrayList<>(); + + final List>> cleanedChildrens = childrenWithRels + .stream() + .filter(p -> p.getRight().containsKey(relType)) + .map(p -> Pair.of(p.getLeft(), p.getRight().get(relType))) + .filter(p -> p.getRight().size() > 0) + .collect(Collectors.toList()); + + for (final Pair> target : cleanedChildrens) { + if (relType.equals("isRelatedTo")) { + list.addAll(enrichMisissingPublicationIsRelatedTo.searchUpdatesForRecord(target, cleanedChildrens)); + } else if (relType.equals("references")) { + list.addAll(enrichMissingPublicationReferences.searchUpdatesForRecord(target, cleanedChildrens)); + } else if (relType.equals("isReferencedBy")) { + list.addAll(enrichMissingPublicationIsReferencedBy.searchUpdatesForRecord(target, cleanedChildrens)); + } else if (relType.equals("isSupplementedTo")) { + list.addAll(enrichMissingPublicationIsSupplementedTo.searchUpdatesForRecord(target, cleanedChildrens)); + } else if (relType.equals("isSupplementedBy")) { + list.addAll(enrichMissingPublicationIsSupplementedBy.searchUpdatesForRecord(target, cleanedChildrens)); + } + } + + return list.stream().map(EventFactory::newBrokerEvent).collect(Collectors.toList()); + + } + + private List generateDatasetRelatedEvents(final String relType, + final Collection>>> childrenWithRels) { + + final List> list = new ArrayList<>(); + + final List>> cleanedChildrens = childrenWithRels + .stream() + .filter(p -> p.getRight().containsKey(relType)) + .map(p -> Pair.of(p.getLeft(), p.getRight().get(relType))) + .filter(p -> p.getRight().size() > 0) + .collect(Collectors.toList()); + + for (final Pair> target : cleanedChildrens) { + if (relType.equals("isRelatedTo")) { + list.addAll(enrichMisissingDatasetIsRelatedTo.searchUpdatesForRecord(target, cleanedChildrens)); + } else if (relType.equals("references")) { + list.addAll(enrichMissingDatasetReferences.searchUpdatesForRecord(target, cleanedChildrens)); + } else if (relType.equals("isReferencedBy")) { + list.addAll(enrichMissingDatasetIsReferencedBy.searchUpdatesForRecord(target, cleanedChildrens)); + } else if (relType.equals("isSupplementedTo")) { + list.addAll(enrichMissingDatasetIsSupplementedTo.searchUpdatesForRecord(target, cleanedChildrens)); + } else if (relType.equals("isSupplementedBy")) { + list.addAll(enrichMissingDatasetIsSupplementedBy.searchUpdatesForRecord(target, cleanedChildrens)); + } + } + + return list.stream().map(EventFactory::newBrokerEvent).collect(Collectors.toList()); + + } + public static Dataset readPath( final SparkSession spark, final String inputPath, diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingAbstract.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingAbstract.java index 43cf738f8c..6dab6355fd 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingAbstract.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingAbstract.java @@ -9,7 +9,7 @@ import eu.dnetlib.dhp.broker.model.Topic; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.schema.oaf.Result; -public class EnrichMissingAbstract extends UpdateMatcher { +public class EnrichMissingAbstract extends UpdateMatcher { public EnrichMissingAbstract() { super(false); @@ -24,7 +24,8 @@ public class EnrichMissingAbstract extends UpdateMatcher { } @Override - public UpdateInfo generateUpdateInfo(final String highlightValue, final Result source, + public UpdateInfo generateUpdateInfo(final String highlightValue, + final Result source, final Result target) { return new UpdateInfo<>( Topic.ENRICH_MISSING_ABSTRACT, diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingAuthorOrcid.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingAuthorOrcid.java index beeccdbe84..c7146ad79a 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingAuthorOrcid.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingAuthorOrcid.java @@ -10,7 +10,7 @@ import eu.dnetlib.dhp.broker.model.Topic; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.schema.oaf.Result; -public class EnrichMissingAuthorOrcid extends UpdateMatcher> { +public class EnrichMissingAuthorOrcid extends UpdateMatcher> { public EnrichMissingAuthorOrcid() { super(true); @@ -24,7 +24,8 @@ public class EnrichMissingAuthorOrcid extends UpdateMatcher @Override public UpdateInfo> generateUpdateInfo(final Pair highlightValue, - final Result source, final Result target) { + final Result source, + final Result target) { return new UpdateInfo<>( Topic.ENRICH_MISSING_AUTHOR_ORCID, highlightValue, source, target, diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingDatasetIsReferencedBy.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingDatasetIsReferencedBy.java new file mode 100644 index 0000000000..3b9326fef2 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingDatasetIsReferencedBy.java @@ -0,0 +1,38 @@ + +package eu.dnetlib.dhp.broker.oa.matchers; + +import java.util.List; + +import org.apache.commons.lang3.tuple.Pair; + +import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; +import eu.dnetlib.dhp.schema.oaf.Dataset; +import eu.dnetlib.dhp.schema.oaf.Result; + +public class EnrichMissingDatasetIsReferencedBy + extends UpdateMatcher>, eu.dnetlib.broker.objects.Dataset> { + + public EnrichMissingDatasetIsReferencedBy() { + super(true); + } + + @Override + protected List> findUpdates(final Pair> source, + final Pair> target) { + // TODO Auto-generated method stub + return null; + } + + @Override + protected UpdateInfo generateUpdateInfo( + final eu.dnetlib.broker.objects.Dataset highlightValue, + final Pair> source, + final Pair> target) { + return new UpdateInfo<>( + Topic.ENRICH_MISSING_DATASET_IS_REFERENCED_BY, + highlightValue, source.getLeft(), target.getLeft(), + (p, rel) -> p.getDatasets().add(rel), + rel -> rel.getInstances().get(0).getUrl()); + } +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingDatasetIsRelatedTo.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingDatasetIsRelatedTo.java new file mode 100644 index 0000000000..35f7c52b42 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingDatasetIsRelatedTo.java @@ -0,0 +1,38 @@ + +package eu.dnetlib.dhp.broker.oa.matchers; + +import java.util.List; + +import org.apache.commons.lang3.tuple.Pair; + +import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; +import eu.dnetlib.dhp.schema.oaf.Dataset; +import eu.dnetlib.dhp.schema.oaf.Result; + +public class EnrichMissingDatasetIsRelatedTo + extends UpdateMatcher>, eu.dnetlib.broker.objects.Dataset> { + + public EnrichMissingDatasetIsRelatedTo() { + super(true); + } + + @Override + protected List> findUpdates(final Pair> source, + final Pair> target) { + // TODO Auto-generated method stub + return null; + } + + @Override + protected UpdateInfo generateUpdateInfo( + final eu.dnetlib.broker.objects.Dataset highlightValue, + final Pair> source, + final Pair> target) { + return new UpdateInfo<>( + Topic.ENRICH_MISSING_DATASET_IS_RELATED_TO, + highlightValue, source.getLeft(), target.getLeft(), + (p, rel) -> p.getDatasets().add(rel), + rel -> rel.getInstances().get(0).getUrl()); + } +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingDatasetIsSupplementedBy.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingDatasetIsSupplementedBy.java new file mode 100644 index 0000000000..1faa305b52 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingDatasetIsSupplementedBy.java @@ -0,0 +1,38 @@ + +package eu.dnetlib.dhp.broker.oa.matchers; + +import java.util.List; + +import org.apache.commons.lang3.tuple.Pair; + +import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; +import eu.dnetlib.dhp.schema.oaf.Dataset; +import eu.dnetlib.dhp.schema.oaf.Result; + +public class EnrichMissingDatasetIsSupplementedBy + extends UpdateMatcher>, eu.dnetlib.broker.objects.Dataset> { + + public EnrichMissingDatasetIsSupplementedBy() { + super(true); + } + + @Override + protected List> findUpdates(final Pair> source, + final Pair> target) { + // TODO Auto-generated method stub + return null; + } + + @Override + protected UpdateInfo generateUpdateInfo( + final eu.dnetlib.broker.objects.Dataset highlightValue, + final Pair> source, + final Pair> target) { + return new UpdateInfo<>( + Topic.ENRICH_MISSING_DATASET_IS_SUPPLEMENTED_BY, + highlightValue, source.getLeft(), target.getLeft(), + (p, rel) -> p.getDatasets().add(rel), + rel -> rel.getInstances().get(0).getUrl()); + } +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingDatasetIsSupplementedTo.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingDatasetIsSupplementedTo.java new file mode 100644 index 0000000000..d1b067272d --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingDatasetIsSupplementedTo.java @@ -0,0 +1,38 @@ + +package eu.dnetlib.dhp.broker.oa.matchers; + +import java.util.List; + +import org.apache.commons.lang3.tuple.Pair; + +import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; +import eu.dnetlib.dhp.schema.oaf.Dataset; +import eu.dnetlib.dhp.schema.oaf.Result; + +public class EnrichMissingDatasetIsSupplementedTo + extends UpdateMatcher>, eu.dnetlib.broker.objects.Dataset> { + + public EnrichMissingDatasetIsSupplementedTo() { + super(true); + } + + @Override + protected List> findUpdates(final Pair> source, + final Pair> target) { + // TODO Auto-generated method stub + return null; + } + + @Override + protected UpdateInfo generateUpdateInfo( + final eu.dnetlib.broker.objects.Dataset highlightValue, + final Pair> source, + final Pair> target) { + return new UpdateInfo<>( + Topic.ENRICH_MISSING_DATASET_IS_SUPPLEMENTED_TO, + highlightValue, source.getLeft(), target.getLeft(), + (p, rel) -> p.getDatasets().add(rel), + rel -> rel.getInstances().get(0).getUrl()); + } +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingDatasetReferences.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingDatasetReferences.java new file mode 100644 index 0000000000..ce6adeba27 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingDatasetReferences.java @@ -0,0 +1,38 @@ + +package eu.dnetlib.dhp.broker.oa.matchers; + +import java.util.List; + +import org.apache.commons.lang3.tuple.Pair; + +import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; +import eu.dnetlib.dhp.schema.oaf.Dataset; +import eu.dnetlib.dhp.schema.oaf.Result; + +public class EnrichMissingDatasetReferences + extends UpdateMatcher>, eu.dnetlib.broker.objects.Dataset> { + + public EnrichMissingDatasetReferences() { + super(true); + } + + @Override + protected List> findUpdates(final Pair> source, + final Pair> target) { + // TODO Auto-generated method stub + return null; + } + + @Override + protected UpdateInfo generateUpdateInfo( + final eu.dnetlib.broker.objects.Dataset highlightValue, + final Pair> source, + final Pair> target) { + return new UpdateInfo<>( + Topic.ENRICH_MISSING_DATASET_REFERENCES, + highlightValue, source.getLeft(), target.getLeft(), + (p, rel) -> p.getDatasets().add(rel), + rel -> rel.getInstances().get(0).getUrl()); + } +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingOpenAccess.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingOpenAccess.java index a4a2ea0c6c..81263c6c3f 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingOpenAccess.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingOpenAccess.java @@ -12,7 +12,7 @@ import eu.dnetlib.dhp.broker.oa.util.ConversionUtils; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.schema.oaf.Result; -public class EnrichMissingOpenAccess extends UpdateMatcher { +public class EnrichMissingOpenAccess extends UpdateMatcher { public EnrichMissingOpenAccess() { super(true); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPid.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPid.java index a8df62541e..5f10bb4d9e 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPid.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPid.java @@ -11,7 +11,7 @@ import eu.dnetlib.dhp.broker.oa.util.ConversionUtils; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.schema.oaf.Result; -public class EnrichMissingPid extends UpdateMatcher { +public class EnrichMissingPid extends UpdateMatcher { public EnrichMissingPid() { super(true); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingProject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingProject.java index b6e5b3b574..0197c99b1b 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingProject.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingProject.java @@ -4,30 +4,35 @@ package eu.dnetlib.dhp.broker.oa.matchers; import java.util.Arrays; import java.util.List; -import eu.dnetlib.broker.objects.Project; +import org.apache.commons.lang3.tuple.Pair; + import eu.dnetlib.dhp.broker.model.Topic; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; +import eu.dnetlib.dhp.schema.oaf.Project; import eu.dnetlib.dhp.schema.oaf.Result; -public class EnrichMissingProject extends UpdateMatcher { +public class EnrichMissingProject + extends UpdateMatcher>, eu.dnetlib.broker.objects.Project> { public EnrichMissingProject() { super(true); } @Override - protected List> findUpdates(final Result source, final Result target) { - // return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f)); + protected List> findUpdates(final Pair> source, + final Pair> target) { + // TODO return Arrays.asList(); } @Override - public UpdateInfo generateUpdateInfo(final Project highlightValue, - final Result source, - final Result target) { + public UpdateInfo generateUpdateInfo( + final eu.dnetlib.broker.objects.Project highlightValue, + final Pair> source, + final Pair> target) { return new UpdateInfo<>( Topic.ENRICH_MISSING_PROJECT, - highlightValue, source, target, + highlightValue, source.getLeft(), target.getLeft(), (p, prj) -> p.getProjects().add(prj), prj -> prj.getFunder() + "::" + prj.getFundingProgram() + prj.getCode()); } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationDate.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationDate.java index 372a4e4c9d..19ef2bab7e 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationDate.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationDate.java @@ -9,7 +9,7 @@ import eu.dnetlib.dhp.broker.model.Topic; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.schema.oaf.Result; -public class EnrichMissingPublicationDate extends UpdateMatcher { +public class EnrichMissingPublicationDate extends UpdateMatcher { public EnrichMissingPublicationDate() { super(false); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationIsReferencedBy.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationIsReferencedBy.java new file mode 100644 index 0000000000..8bcee5a1fc --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationIsReferencedBy.java @@ -0,0 +1,42 @@ + +package eu.dnetlib.dhp.broker.oa.matchers; + +import java.util.Arrays; +import java.util.List; + +import org.apache.commons.lang3.tuple.Pair; + +import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; +import eu.dnetlib.dhp.schema.oaf.Publication; +import eu.dnetlib.dhp.schema.oaf.Result; + +public class EnrichMissingPublicationIsReferencedBy + extends UpdateMatcher>, eu.dnetlib.broker.objects.Publication> { + + public EnrichMissingPublicationIsReferencedBy() { + super(true); + } + + @Override + protected List> findUpdates( + final Pair> source, + final Pair> target) { + // TODO Auto-generated method stub + return Arrays.asList(); + } + + @Override + protected UpdateInfo generateUpdateInfo( + final eu.dnetlib.broker.objects.Publication highlightValue, + final Pair> source, + final Pair> target) { + return new UpdateInfo<>( + Topic.ENRICH_MISSING_PUBLICATION_IS_REFERENCED_BY, + highlightValue, source.getLeft(), target.getLeft(), + (p, rel) -> { + }, // p.getPublications().add(rel), //TODO available in the future release of dnet-openaire-broker-common + rel -> rel.getOriginalId()); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationIsRelatedTo.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationIsRelatedTo.java new file mode 100644 index 0000000000..0c16f9f564 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationIsRelatedTo.java @@ -0,0 +1,42 @@ + +package eu.dnetlib.dhp.broker.oa.matchers; + +import java.util.Arrays; +import java.util.List; + +import org.apache.commons.lang3.tuple.Pair; + +import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; +import eu.dnetlib.dhp.schema.oaf.Publication; +import eu.dnetlib.dhp.schema.oaf.Result; + +public class EnrichMissingPublicationIsRelatedTo + extends UpdateMatcher>, eu.dnetlib.broker.objects.Publication> { + + public EnrichMissingPublicationIsRelatedTo() { + super(true); + } + + @Override + protected List> findUpdates( + final Pair> source, + final Pair> target) { + // TODO Auto-generated method stub + return Arrays.asList(); + } + + @Override + protected UpdateInfo generateUpdateInfo( + final eu.dnetlib.broker.objects.Publication highlightValue, + final Pair> source, + final Pair> target) { + return new UpdateInfo<>( + Topic.ENRICH_MISSING_PUBLICATION_IS_RELATED_TO, + highlightValue, source.getLeft(), target.getLeft(), + (p, rel) -> { + }, // p.getPublications().add(rel), //TODO available in the future release of dnet-openaire-broker-common + rel -> rel.getOriginalId()); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationIsSupplementedBy.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationIsSupplementedBy.java new file mode 100644 index 0000000000..0b3c332708 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationIsSupplementedBy.java @@ -0,0 +1,42 @@ + +package eu.dnetlib.dhp.broker.oa.matchers; + +import java.util.Arrays; +import java.util.List; + +import org.apache.commons.lang3.tuple.Pair; + +import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; +import eu.dnetlib.dhp.schema.oaf.Publication; +import eu.dnetlib.dhp.schema.oaf.Result; + +public class EnrichMissingPublicationIsSupplementedBy + extends UpdateMatcher>, eu.dnetlib.broker.objects.Publication> { + + public EnrichMissingPublicationIsSupplementedBy() { + super(true); + } + + @Override + protected List> findUpdates( + final Pair> source, + final Pair> target) { + // TODO Auto-generated method stub + return Arrays.asList(); + } + + @Override + protected UpdateInfo generateUpdateInfo( + final eu.dnetlib.broker.objects.Publication highlightValue, + final Pair> source, + final Pair> target) { + return new UpdateInfo<>( + Topic.ENRICH_MISSING_PUBLICATION_IS_SUPPLEMENTED_BY, + highlightValue, source.getLeft(), target.getLeft(), + (p, rel) -> { + }, // p.getPublications().add(rel), //TODO available in the future release of dnet-openaire-broker-common + rel -> rel.getOriginalId()); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationIsSupplementedTo.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationIsSupplementedTo.java new file mode 100644 index 0000000000..0e72a84239 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationIsSupplementedTo.java @@ -0,0 +1,42 @@ + +package eu.dnetlib.dhp.broker.oa.matchers; + +import java.util.Arrays; +import java.util.List; + +import org.apache.commons.lang3.tuple.Pair; + +import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; +import eu.dnetlib.dhp.schema.oaf.Publication; +import eu.dnetlib.dhp.schema.oaf.Result; + +public class EnrichMissingPublicationIsSupplementedTo + extends UpdateMatcher>, eu.dnetlib.broker.objects.Publication> { + + public EnrichMissingPublicationIsSupplementedTo() { + super(true); + } + + @Override + protected List> findUpdates( + final Pair> source, + final Pair> target) { + // TODO Auto-generated method stub + return Arrays.asList(); + } + + @Override + protected UpdateInfo generateUpdateInfo( + final eu.dnetlib.broker.objects.Publication highlightValue, + final Pair> source, + final Pair> target) { + return new UpdateInfo<>( + Topic.ENRICH_MISSING_PUBLICATION_IS_SUPPLEMENTED_TO, + highlightValue, source.getLeft(), target.getLeft(), + (p, rel) -> { + }, // p.getPublications().add(rel), //TODO available in the future release of dnet-openaire-broker-common + rel -> rel.getOriginalId()); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationReferences.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationReferences.java new file mode 100644 index 0000000000..6d1124974e --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationReferences.java @@ -0,0 +1,42 @@ + +package eu.dnetlib.dhp.broker.oa.matchers; + +import java.util.Arrays; +import java.util.List; + +import org.apache.commons.lang3.tuple.Pair; + +import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; +import eu.dnetlib.dhp.schema.oaf.Publication; +import eu.dnetlib.dhp.schema.oaf.Result; + +public class EnrichMissingPublicationReferences + extends UpdateMatcher>, eu.dnetlib.broker.objects.Publication> { + + public EnrichMissingPublicationReferences() { + super(true); + } + + @Override + protected List> findUpdates( + final Pair> source, + final Pair> target) { + // TODO Auto-generated method stub + return Arrays.asList(); + } + + @Override + protected UpdateInfo generateUpdateInfo( + final eu.dnetlib.broker.objects.Publication highlightValue, + final Pair> source, + final Pair> target) { + return new UpdateInfo<>( + Topic.ENRICH_MISSING_PUBLICATION_REFERENCES, + highlightValue, source.getLeft(), target.getLeft(), + (p, rel) -> { + }, // p.getPublications().add(rel), //TODO available in the future release of dnet-openaire-broker-common + rel -> rel.getOriginalId()); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingSoftware.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingSoftware.java new file mode 100644 index 0000000000..954ee48be7 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingSoftware.java @@ -0,0 +1,41 @@ + +package eu.dnetlib.dhp.broker.oa.matchers; + +import java.util.Arrays; +import java.util.List; + +import org.apache.commons.lang3.tuple.Pair; + +import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; +import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.schema.oaf.Software; + +public class EnrichMissingSoftware + extends UpdateMatcher>, eu.dnetlib.broker.objects.Software> { + + public EnrichMissingSoftware() { + super(true); + } + + @Override + protected List> findUpdates( + final Pair> source, + final Pair> target) { + // TODO + return Arrays.asList(); + } + + @Override + public UpdateInfo generateUpdateInfo( + final eu.dnetlib.broker.objects.Software highlightValue, + final Pair> source, + final Pair> target) { + return new UpdateInfo<>( + Topic.ENRICH_MISSING_SOFTWARE, + highlightValue, source.getLeft(), target.getLeft(), + (p, s) -> p.getSoftwares().add(s), + s -> s.getName()); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingSubject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingSubject.java index 79e9d469be..a7c72f6ea3 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingSubject.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingSubject.java @@ -14,7 +14,7 @@ import eu.dnetlib.dhp.schema.oaf.Qualifier; import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.StructuredProperty; -public class EnrichMissingSubject extends UpdateMatcher> { +public class EnrichMissingSubject extends UpdateMatcher> { public EnrichMissingSubject() { super(true); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMoreOpenAccess.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMoreOpenAccess.java index 40c9b0500a..2cd2775c9e 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMoreOpenAccess.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMoreOpenAccess.java @@ -12,7 +12,7 @@ import eu.dnetlib.dhp.broker.oa.util.ConversionUtils; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.schema.oaf.Result; -public class EnrichMoreOpenAccess extends UpdateMatcher { +public class EnrichMoreOpenAccess extends UpdateMatcher { public EnrichMoreOpenAccess() { super(true); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMorePid.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMorePid.java index 0e7b7766ab..048d19747d 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMorePid.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMorePid.java @@ -11,7 +11,7 @@ import eu.dnetlib.dhp.broker.oa.util.ConversionUtils; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.schema.oaf.Result; -public class EnrichMorePid extends UpdateMatcher { +public class EnrichMorePid extends UpdateMatcher { public EnrichMorePid() { super(true); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMoreProject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMoreProject.java new file mode 100644 index 0000000000..4bf45d9437 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMoreProject.java @@ -0,0 +1,39 @@ + +package eu.dnetlib.dhp.broker.oa.matchers; + +import java.util.Arrays; +import java.util.List; + +import org.apache.commons.lang3.tuple.Pair; + +import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; +import eu.dnetlib.dhp.schema.oaf.Project; +import eu.dnetlib.dhp.schema.oaf.Result; + +public class EnrichMoreProject extends UpdateMatcher>, eu.dnetlib.broker.objects.Project> { + + public EnrichMoreProject() { + super(true); + } + + @Override + protected List> findUpdates(final Pair> source, + final Pair> target) { + // TODO + return Arrays.asList(); + } + + @Override + public UpdateInfo generateUpdateInfo( + final eu.dnetlib.broker.objects.Project highlightValue, + final Pair> source, + final Pair> target) { + return new UpdateInfo<>( + Topic.ENRICH_MORE_PROJECT, + highlightValue, source.getLeft(), target.getLeft(), + (p, prj) -> p.getProjects().add(prj), + prj -> prj.getFunder() + "::" + prj.getFundingProgram() + prj.getCode()); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMoreSoftware.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMoreSoftware.java new file mode 100644 index 0000000000..9760504f64 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMoreSoftware.java @@ -0,0 +1,41 @@ + +package eu.dnetlib.dhp.broker.oa.matchers; + +import java.util.Arrays; +import java.util.List; + +import org.apache.commons.lang3.tuple.Pair; + +import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; +import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.schema.oaf.Software; + +public class EnrichMoreSoftware + extends UpdateMatcher>, eu.dnetlib.broker.objects.Software> { + + public EnrichMoreSoftware() { + super(true); + } + + @Override + protected List> findUpdates( + final Pair> source, + final Pair> target) { + // TODO + return Arrays.asList(); + } + + @Override + public UpdateInfo generateUpdateInfo( + final eu.dnetlib.broker.objects.Software highlightValue, + final Pair> source, + final Pair> target) { + return new UpdateInfo<>( + Topic.ENRICH_MORE_SOFTWARE, + highlightValue, source.getLeft(), target.getLeft(), + (p, s) -> p.getSoftwares().add(s), + s -> s.getName()); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMoreSubject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMoreSubject.java index e6374479bf..67c2f01161 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMoreSubject.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMoreSubject.java @@ -12,7 +12,7 @@ import eu.dnetlib.dhp.broker.oa.util.ConversionUtils; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.schema.oaf.Result; -public class EnrichMoreSubject extends UpdateMatcher> { +public class EnrichMoreSubject extends UpdateMatcher> { public EnrichMoreSubject() { super(true); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcher.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcher.java index d91b03200e..5bfe108a56 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcher.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcher.java @@ -12,9 +12,8 @@ import org.apache.commons.lang3.StringUtils; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.schema.oaf.Field; -import eu.dnetlib.dhp.schema.oaf.Result; -public abstract class UpdateMatcher { +public abstract class UpdateMatcher { private final boolean multipleUpdate; @@ -22,15 +21,16 @@ public abstract class UpdateMatcher { this.multipleUpdate = multipleUpdate; } - public Collection> searchUpdatesForRecord(final Result res, final Result... others) { + public Collection> searchUpdatesForRecord(final K res, final Collection others) { final Map> infoMap = new HashMap<>(); - for (final Result source : others) { + for (final K source : others) { if (source != res) { for (final UpdateInfo info : findUpdates(source, res)) { final String s = DigestUtils.md5Hex(info.getHighlightValueAsString()); - if (!infoMap.containsKey(s) || infoMap.get(s).getTrust() < info.getTrust()) {} else { + if (!infoMap.containsKey(s) || infoMap.get(s).getTrust() < info.getTrust()) { + } else { infoMap.put(s, info); } } @@ -51,11 +51,11 @@ public abstract class UpdateMatcher { } } - protected abstract List> findUpdates(Result source, Result target); + protected abstract List> findUpdates(K source, K target); protected abstract UpdateInfo generateUpdateInfo(final T highlightValue, - final Result source, - final Result target); + final K source, + final K target); protected static boolean isMissing(final List> list) { return list == null || list.isEmpty() || StringUtils.isBlank(list.get(0).getValue()); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/BrokerConstants.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/BrokerConstants.java index d61d5bfb7d..8e97192bfb 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/BrokerConstants.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/BrokerConstants.java @@ -4,4 +4,6 @@ package eu.dnetlib.dhp.broker.oa.util; public class BrokerConstants { public final static String OPEN_ACCESS = "OPEN"; + public final static String IS_MERGED_IN_CLASS = "isMergedIn"; + } From b33dd58be4dbaeb841363c61d95f3f9bab890d25 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 22 May 2020 08:50:06 +0200 Subject: [PATCH 05/37] replaced parameter 'reuseRecords' with 'resumeFrom', allowing to restart the provision workflow execution from any step, useful for manual submissions or debugging --- .../dhp/oa/provision/oozie_app/workflow.xml | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml index fd8f5ba89a..6983ecf53c 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml @@ -84,7 +84,6 @@ sparkNetworkTimeout configures spark.network.timeout - @@ -98,12 +97,16 @@ - + - + - ${wf:conf('reuseRecords') eq false} - ${wf:conf('reuseRecords') eq true} + ${wf:conf('resumeFrom') eq 'prepare_relations'} + ${wf:conf('resumeFrom') eq 'fork_join_related_entities'} + ${wf:conf('resumeFrom') eq 'join_all_entities'} + ${wf:conf('resumeFrom') eq 'adjancency_lists'} + ${wf:conf('resumeFrom') eq 'convert_to_xml'} + ${wf:conf('resumeFrom') eq 'to_solr_index'} @@ -131,9 +134,7 @@ --inputRelationsPath${inputGraphRootPath}/relation --outputPath${workingDir}/relation - --relPartitions${relPartitions} - --relationFilter${relationFilter} - --maxRelations${maxRelations} + --relPartitions3000 @@ -340,7 +341,6 @@ - yarn @@ -362,7 +362,7 @@ --inputGraphRootPath${inputGraphRootPath} --inputRelatedEntitiesPath${workingDir}/join_partial --outputPath${workingDir}/join_entities - --numPartitions24000 + --numPartitions12000 @@ -386,7 +386,7 @@ --conf spark.sql.shuffle.partitions=7680 --conf spark.network.timeout=${sparkNetworkTimeout} - --inputPath${workingDir}/join_entities + --inputPath ${workingDir}/join_entities --outputPath${workingDir}/joined From 925d933204529190205d67c0f580ef58975575e1 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 22 May 2020 08:50:44 +0200 Subject: [PATCH 06/37] making XmlRecordFactory immune to graph encoding changes (mostly to avoid NPEs) --- .../oa/provision/utils/TemplateFactory.java | 3 +- .../oa/provision/utils/XmlRecordFactory.java | 77 +++++++++++++++---- 2 files changed, 62 insertions(+), 18 deletions(-) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateFactory.java index 6cb025b4fd..21b526ab1b 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateFactory.java @@ -5,6 +5,7 @@ import static eu.dnetlib.dhp.oa.provision.utils.GraphMappingUtils.removePrefix; import static eu.dnetlib.dhp.oa.provision.utils.XmlSerializationUtils.escapeXml; import java.io.IOException; +import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.stream.Collectors; @@ -95,7 +96,7 @@ public class TemplateFactory { .add("metadata", instancemetadata) .add( "webresources", - webresources + (webresources != null ? webresources : new ArrayList()) .stream() .filter(StringUtils::isNotBlank) .map(w -> getWebResource(w)) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java index ce1c71312e..6f042b45cf 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java @@ -174,6 +174,7 @@ public class XmlRecordFactory implements Serializable { entity .getCollectedfrom() .stream() + .filter(XmlRecordFactory::kvNotBlank) .map(kv -> XmlSerializationUtils.mapKeyValue("collectedfrom", kv)) .collect(Collectors.toList())); } @@ -183,6 +184,7 @@ public class XmlRecordFactory implements Serializable { entity .getOriginalId() .stream() + .filter(Objects::nonNull) .map(s -> XmlSerializationUtils.asXmlElement("originalId", s)) .collect(Collectors.toList())); } @@ -192,6 +194,7 @@ public class XmlRecordFactory implements Serializable { entity .getPid() .stream() + .filter(Objects::nonNull) .map(p -> XmlSerializationUtils.mapStructuredProperty("pid", p)) .collect(Collectors.toList())); } @@ -213,6 +216,7 @@ public class XmlRecordFactory implements Serializable { r .getTitle() .stream() + .filter(Objects::nonNull) .map(t -> XmlSerializationUtils.mapStructuredProperty("title", t)) .collect(Collectors.toList())); } @@ -225,6 +229,7 @@ public class XmlRecordFactory implements Serializable { r .getAuthor() .stream() + .filter(Objects::nonNull) .map( a -> { final StringBuilder sb = new StringBuilder(" isNotBlank(sp.getQualifier().getClassid()) && isNotBlank(sp.getValue())) + .collect( + Collectors + .toMap( + p -> getAuthorPidType(p.getQualifier().getClassid()), + p -> p, + (p1, p2) -> p1)) + .values() .forEach( sp -> { - String pidType = XmlSerializationUtils - .escapeXml( - sp.getQualifier().getClassid()) - .replaceAll("\\W", ""); + String pidType = getAuthorPidType(sp.getQualifier().getClassid()); String pidValue = XmlSerializationUtils.escapeXml(sp.getValue()); - // ugly hack: some records - // provide swapped pidtype and - // pidvalue + // ugly hack: some records provide swapped pidtype and pidvalue if (authorPidTypes.contains(pidValue.toLowerCase().trim())) { sb.append(String.format(" %s=\"%s\"", pidValue, pidType)); } else { - pidType = pidType.replaceAll("\\W", "").replaceAll("\\d", ""); if (isNotBlank(pidType)) { sb .append( @@ -285,6 +292,7 @@ public class XmlRecordFactory implements Serializable { r .getContributor() .stream() + .filter(Objects::nonNull) .map(c -> XmlSerializationUtils.asXmlElement("contributor", c.getValue())) .collect(Collectors.toList())); } @@ -294,6 +302,7 @@ public class XmlRecordFactory implements Serializable { r .getCountry() .stream() + .filter(Objects::nonNull) .map(c -> XmlSerializationUtils.mapQualifier("country", c)) .collect(Collectors.toList())); } @@ -303,6 +312,7 @@ public class XmlRecordFactory implements Serializable { r .getCoverage() .stream() + .filter(Objects::nonNull) .map(c -> XmlSerializationUtils.asXmlElement("coverage", c.getValue())) .collect(Collectors.toList())); } @@ -319,6 +329,7 @@ public class XmlRecordFactory implements Serializable { r .getDescription() .stream() + .filter(Objects::nonNull) .map(c -> XmlSerializationUtils.asXmlElement("description", c.getValue())) .collect(Collectors.toList())); } @@ -333,6 +344,7 @@ public class XmlRecordFactory implements Serializable { r .getSubject() .stream() + .filter(Objects::nonNull) .map(s -> XmlSerializationUtils.mapStructuredProperty("subject", s)) .collect(Collectors.toList())); } @@ -345,6 +357,7 @@ public class XmlRecordFactory implements Serializable { r .getRelevantdate() .stream() + .filter(Objects::nonNull) .map(s -> XmlSerializationUtils.mapStructuredProperty("relevantdate", s)) .collect(Collectors.toList())); } @@ -357,6 +370,7 @@ public class XmlRecordFactory implements Serializable { r .getSource() .stream() + .filter(Objects::nonNull) .map(c -> XmlSerializationUtils.asXmlElement("source", c.getValue())) .collect(Collectors.toList())); } @@ -366,6 +380,7 @@ public class XmlRecordFactory implements Serializable { r .getFormat() .stream() + .filter(Objects::nonNull) .map(c -> XmlSerializationUtils.asXmlElement("format", c.getValue())) .collect(Collectors.toList())); } @@ -429,6 +444,7 @@ public class XmlRecordFactory implements Serializable { orp .getContactperson() .stream() + .filter(Objects::nonNull) .map(c -> XmlSerializationUtils.asXmlElement("contactperson", c.getValue())) .collect(Collectors.toList())); } @@ -439,6 +455,7 @@ public class XmlRecordFactory implements Serializable { orp .getContactgroup() .stream() + .filter(Objects::nonNull) .map(c -> XmlSerializationUtils.asXmlElement("contactgroup", c.getValue())) .collect(Collectors.toList())); } @@ -448,6 +465,7 @@ public class XmlRecordFactory implements Serializable { orp .getTool() .stream() + .filter(Objects::nonNull) .map(c -> XmlSerializationUtils.asXmlElement("tool", c.getValue())) .collect(Collectors.toList())); } @@ -461,6 +479,7 @@ public class XmlRecordFactory implements Serializable { s .getDocumentationUrl() .stream() + .filter(Objects::nonNull) .map(c -> XmlSerializationUtils.asXmlElement("documentationUrl", c.getValue())) .collect(Collectors.toList())); } @@ -470,6 +489,7 @@ public class XmlRecordFactory implements Serializable { s .getLicense() .stream() + .filter(Objects::nonNull) .map(l -> XmlSerializationUtils.mapStructuredProperty("license", l)) .collect(Collectors.toList())); } @@ -576,6 +596,7 @@ public class XmlRecordFactory implements Serializable { ds .getOdlanguages() .stream() + .filter(Objects::nonNull) .map(c -> XmlSerializationUtils.asXmlElement("odlanguages", c.getValue())) .collect(Collectors.toList())); } @@ -585,6 +606,7 @@ public class XmlRecordFactory implements Serializable { ds .getOdcontenttypes() .stream() + .filter(Objects::nonNull) .map(c -> XmlSerializationUtils.asXmlElement("odcontenttypes", c.getValue())) .collect(Collectors.toList())); } @@ -697,6 +719,7 @@ public class XmlRecordFactory implements Serializable { ds .getPolicies() .stream() + .filter(XmlRecordFactory::kvNotBlank) .map(kv -> XmlSerializationUtils.mapKeyValue("policies", kv)) .collect(Collectors.toList())); } @@ -709,6 +732,7 @@ public class XmlRecordFactory implements Serializable { ds .getSubjects() .stream() + .filter(Objects::nonNull) .map(sp -> XmlSerializationUtils.mapStructuredProperty("subjects", sp)) .collect(Collectors.toList())); } @@ -735,6 +759,7 @@ public class XmlRecordFactory implements Serializable { o .getAlternativeNames() .stream() + .filter(Objects::nonNull) .map(c -> XmlSerializationUtils.asXmlElement("alternativeNames", c.getValue())) .collect(Collectors.toList())); } @@ -862,6 +887,7 @@ public class XmlRecordFactory implements Serializable { p .getSubjects() .stream() + .filter(Objects::nonNull) .map(sp -> XmlSerializationUtils.mapStructuredProperty("subject", sp)) .collect(Collectors.toList())); } @@ -912,7 +938,12 @@ public class XmlRecordFactory implements Serializable { if (p.getFundingtree() != null) { metadata .addAll( - p.getFundingtree().stream().map(ft -> ft.getValue()).collect(Collectors.toList())); + p + .getFundingtree() + .stream() + .filter(Objects::nonNull) + .map(ft -> ft.getValue()) + .collect(Collectors.toList())); } break; @@ -923,6 +954,17 @@ public class XmlRecordFactory implements Serializable { return metadata; } + private String getAuthorPidType(String s) { + return XmlSerializationUtils + .escapeXml(s) + .replaceAll("\\W", "") + .replaceAll("\\d", ""); + } + + private static boolean kvNotBlank(KeyValue kv) { + return kv != null && StringUtils.isNotBlank(kv.getKey()) && StringUtils.isNotBlank(kv.getValue()); + } + private void mapDatasourceType(List metadata, final Qualifier dsType) { metadata.add(XmlSerializationUtils.mapQualifier("datasourcetype", dsType)); @@ -960,7 +1002,7 @@ public class XmlRecordFactory implements Serializable { .add( XmlSerializationUtils.asXmlElement("coderepositoryurl", re.getCodeRepositoryUrl())); } - if (re.getResulttype() != null & re.getResulttype().isBlank()) { + if (re.getResulttype() != null && re.getResulttype().isBlank()) { metadata.add(XmlSerializationUtils.mapQualifier("resulttype", re.getResulttype())); } if (re.getCollectedfrom() != null) { @@ -969,6 +1011,7 @@ public class XmlRecordFactory implements Serializable { re .getCollectedfrom() .stream() + .filter(XmlRecordFactory::kvNotBlank) .map(kv -> XmlSerializationUtils.mapKeyValue("collectedfrom", kv)) .collect(Collectors.toList())); } @@ -986,10 +1029,10 @@ public class XmlRecordFactory implements Serializable { if (isNotBlank(re.getOfficialname())) { metadata.add(XmlSerializationUtils.asXmlElement("officialname", re.getOfficialname())); } - if (re.getDatasourcetype() != null & !re.getDatasourcetype().isBlank()) { + if (re.getDatasourcetype() != null && !re.getDatasourcetype().isBlank()) { mapDatasourceType(metadata, re.getDatasourcetype()); } - if (re.getOpenairecompatibility() != null & !re.getOpenairecompatibility().isBlank()) { + if (re.getOpenairecompatibility() != null && !re.getOpenairecompatibility().isBlank()) { metadata .add( XmlSerializationUtils @@ -1006,7 +1049,7 @@ public class XmlRecordFactory implements Serializable { .add( XmlSerializationUtils.asXmlElement("legalshortname", re.getLegalshortname())); } - if (re.getCountry() != null & !re.getCountry().isBlank()) { + if (re.getCountry() != null && !re.getCountry().isBlank()) { metadata.add(XmlSerializationUtils.mapQualifier("country", re.getCountry())); } break; @@ -1020,10 +1063,10 @@ public class XmlRecordFactory implements Serializable { if (isNotBlank(re.getAcronym())) { metadata.add(XmlSerializationUtils.asXmlElement("acronym", re.getAcronym())); } - if (re.getContracttype() != null & !re.getContracttype().isBlank()) { + if (re.getContracttype() != null && !re.getContracttype().isBlank()) { metadata.add(XmlSerializationUtils.mapQualifier("contracttype", re.getContracttype())); } - if (re.getFundingtree() != null & contexts != null) { + if (re.getFundingtree() != null && contexts != null) { metadata .addAll( re @@ -1091,12 +1134,12 @@ public class XmlRecordFactory implements Serializable { .add( XmlSerializationUtils.mapQualifier("accessright", instance.getAccessright())); } - if (instance.getCollectedfrom() != null) { + if (instance.getCollectedfrom() != null && kvNotBlank(instance.getCollectedfrom())) { fields .add( XmlSerializationUtils.mapKeyValue("collectedfrom", instance.getCollectedfrom())); } - if (instance.getHostedby() != null) { + if (instance.getHostedby() != null && kvNotBlank(instance.getHostedby())) { fields.add(XmlSerializationUtils.mapKeyValue("hostedby", instance.getHostedby())); } if (instance.getDateofacceptance() != null From c5f7e173481f7b761a56742067454f58ff0b4ace Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Fri, 22 May 2020 10:08:02 +0200 Subject: [PATCH 07/37] author fullnames --- .../dhp/oa/graph/raw/OdfToOafMapper.java | 126 ++++++++---------- 1 file changed, 55 insertions(+), 71 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java index 30b980c422..3a56aa8e0e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java @@ -4,16 +4,32 @@ package eu.dnetlib.dhp.oa.graph.raw; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.createOpenaireId; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.field; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty; -import static eu.dnetlib.dhp.schema.common.ModelConstants.*; +import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_ACCESS_MODES; +import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_DATA_CITE_DATE; +import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_DATA_CITE_RESOURCE; +import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_LANGUAGES; +import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PID_TYPES; +import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PUBLICATION_RESOURCE; +import static eu.dnetlib.dhp.schema.common.ModelConstants.HAS_PARTS; +import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PART_OF; +import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_SUPPLEMENTED_BY; +import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_SUPPLEMENT_TO; +import static eu.dnetlib.dhp.schema.common.ModelConstants.PART; +import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_RESULT; +import static eu.dnetlib.dhp.schema.common.ModelConstants.SUPPLEMENT; -import java.util.*; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; import org.apache.commons.lang3.StringUtils; import org.dom4j.Document; import org.dom4j.Node; import eu.dnetlib.dhp.oa.graph.raw.common.PacePerson; -import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Author; import eu.dnetlib.dhp.schema.oaf.DataInfo; import eu.dnetlib.dhp.schema.oaf.Field; @@ -22,7 +38,6 @@ import eu.dnetlib.dhp.schema.oaf.Instance; import eu.dnetlib.dhp.schema.oaf.KeyValue; import eu.dnetlib.dhp.schema.oaf.Oaf; import eu.dnetlib.dhp.schema.oaf.Qualifier; -import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.schema.oaf.StructuredProperty; public class OdfToOafMapper extends AbstractMdRecordToOafMapper { @@ -48,7 +63,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { final String fullname = n.valueOf("./datacite:creatorName"); author.setFullname(fullname); - PacePerson pp = new PacePerson(fullname, false); + final PacePerson pp = new PacePerson(fullname, false); final String name = n.valueOf("./datacite:givenName"); if (StringUtils.isBlank(name) & pp.isAccurate()) { author.setName(pp.getNormalisedFirstName()); @@ -63,6 +78,10 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { author.setSurname(surname); } + if (StringUtils.isBlank(author.getFullname())) { + author.setFullname(String.format("%s, %s", author.getSurname(), author.getName())); + } + author.setAffiliation(prepareListFields(n, "./datacite:affiliation", info)); author.setPid(preparePids(n, info)); author.setRank(pos++); @@ -75,12 +94,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { final List res = new ArrayList<>(); for (final Object o : n.selectNodes("./datacite:nameIdentifier")) { res - .add( - structuredProperty( - ((Node) o).getText(), - prepareQualifier( - (Node) o, "./@nameIdentifierScheme", DNET_PID_TYPES, DNET_PID_TYPES), - info)); + .add(structuredProperty(((Node) o).getText(), prepareQualifier((Node) o, "./@nameIdentifierScheme", DNET_PID_TYPES, DNET_PID_TYPES), info)); } return res; } @@ -94,22 +108,18 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { final Instance instance = new Instance(); instance - .setInstancetype( - prepareQualifier( - doc, "//dr:CobjCategory", DNET_PUBLICATION_RESOURCE, DNET_PUBLICATION_RESOURCE)); + .setInstancetype(prepareQualifier(doc, "//dr:CobjCategory", DNET_PUBLICATION_RESOURCE, DNET_PUBLICATION_RESOURCE)); instance.setCollectedfrom(collectedfrom); instance.setHostedby(hostedby); instance.setDateofacceptance(field(doc.valueOf("//oaf:dateAccepted"), info)); instance.setDistributionlocation(doc.valueOf("//oaf:distributionlocation")); instance - .setAccessright( - prepareQualifier(doc, "//oaf:accessrights", DNET_ACCESS_MODES, DNET_ACCESS_MODES)); + .setAccessright(prepareQualifier(doc, "//oaf:accessrights", DNET_ACCESS_MODES, DNET_ACCESS_MODES)); instance.setLicense(field(doc.valueOf("//oaf:license"), info)); instance.setRefereed(field(doc.valueOf("//oaf:refereed"), info)); instance.setProcessingchargeamount(field(doc.valueOf("//oaf:processingchargeamount"), info)); instance - .setProcessingchargecurrency( - field(doc.valueOf("//oaf:processingchargeamount/@currency"), info)); + .setProcessingchargecurrency(field(doc.valueOf("//oaf:processingchargeamount/@currency"), info)); final Set url = new HashSet<>(); for (final Object o : doc.selectNodes("//datacite:alternateIdentifier[@alternateIdentifierType='URL']")) { @@ -147,14 +157,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { && !dateType.equalsIgnoreCase("Updated") && !dateType.equalsIgnoreCase("Available")) { res - .add( - structuredProperty( - ((Node) o).getText(), - "UNKNOWN", - "UNKNOWN", - DNET_DATA_CITE_DATE, - DNET_DATA_CITE_DATE, - info)); + .add(structuredProperty(((Node) o).getText(), "UNKNOWN", "UNKNOWN", DNET_DATA_CITE_DATE, DNET_DATA_CITE_DATE, info)); } } return res; @@ -197,53 +200,49 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { @Override protected List> prepareOtherResearchProductTools( - final Document doc, final DataInfo info) { + final Document doc, + final DataInfo info) { return new ArrayList<>(); // Not present in ODF ??? } @Override protected List> prepareOtherResearchProductContactGroups( - final Document doc, final DataInfo info) { - return prepareListFields( - doc, - "//datacite:contributor[@contributorType='ContactGroup']/datacite:contributorName", - info); + final Document doc, + final DataInfo info) { + return prepareListFields(doc, "//datacite:contributor[@contributorType='ContactGroup']/datacite:contributorName", info); } @Override protected List> prepareOtherResearchProductContactPersons( - final Document doc, final DataInfo info) { - return prepareListFields( - doc, - "//datacite:contributor[@contributorType='ContactPerson']/datacite:contributorName", - info); + final Document doc, + final DataInfo info) { + return prepareListFields(doc, "//datacite:contributor[@contributorType='ContactPerson']/datacite:contributorName", info); } @Override protected Qualifier prepareSoftwareProgrammingLanguage(final Document doc, final DataInfo info) { - return prepareQualifier( - doc, "//datacite:format", "dnet:programming_languages", "dnet:programming_languages"); + return prepareQualifier(doc, "//datacite:format", "dnet:programming_languages", "dnet:programming_languages"); } @Override protected Field prepareSoftwareCodeRepositoryUrl( - final Document doc, final DataInfo info) { + final Document doc, + final DataInfo info) { return null; // Not present in ODF ??? } @Override protected List prepareSoftwareLicenses( - final Document doc, final DataInfo info) { + final Document doc, + final DataInfo info) { return new ArrayList<>(); // Not present in ODF ??? } @Override protected List> prepareSoftwareDocumentationUrls( - final Document doc, final DataInfo info) { - return prepareListFields( - doc, - "//datacite:relatedIdentifier[@relatedIdentifierType='URL' and @relationType='IsDocumentedBy']", - info); + final Document doc, + final DataInfo info) { + return prepareListFields(doc, "//datacite:relatedIdentifier[@relatedIdentifierType='URL' and @relationType='IsDocumentedBy']", info); } // DATASETS @@ -264,13 +263,15 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { @Override protected Field prepareDatasetMetadataVersionNumber( - final Document doc, final DataInfo info) { + final Document doc, + final DataInfo info) { return null; // Not present in ODF ??? } @Override protected Field prepareDatasetLastMetadataUpdate( - final Document doc, final DataInfo info) { + final Document doc, + final DataInfo info) { return prepareField(doc, "//datacite:date[@dateType='Updated']", info); } @@ -315,29 +316,16 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { if (type.equalsIgnoreCase("IsSupplementTo")) { res - .add( - getRelation( - docId, otherId, RESULT_RESULT, SUPPLEMENT, IS_SUPPLEMENT_TO, collectedFrom, info, - lastUpdateTimestamp)); + .add(getRelation(docId, otherId, RESULT_RESULT, SUPPLEMENT, IS_SUPPLEMENT_TO, collectedFrom, info, lastUpdateTimestamp)); res - .add( - getRelation( - otherId, docId, RESULT_RESULT, SUPPLEMENT, IS_SUPPLEMENTED_BY, collectedFrom, info, - lastUpdateTimestamp)); + .add(getRelation(otherId, docId, RESULT_RESULT, SUPPLEMENT, IS_SUPPLEMENTED_BY, collectedFrom, info, lastUpdateTimestamp)); } else if (type.equals("IsPartOf")) { res - .add( - getRelation( - docId, otherId, RESULT_RESULT, PART, IS_PART_OF, collectedFrom, info, - lastUpdateTimestamp)); + .add(getRelation(docId, otherId, RESULT_RESULT, PART, IS_PART_OF, collectedFrom, info, lastUpdateTimestamp)); res - .add( - getRelation( - otherId, docId, RESULT_RESULT, PART, HAS_PARTS, collectedFrom, info, - lastUpdateTimestamp)); - } else { - } + .add(getRelation(otherId, docId, RESULT_RESULT, PART, HAS_PARTS, collectedFrom, info, lastUpdateTimestamp)); + } else {} } } return res; @@ -345,10 +333,6 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { @Override protected Qualifier prepareResourceType(final Document doc, final DataInfo info) { - return prepareQualifier( - doc, - "//*[local-name() = 'resource']//*[local-name() = 'resourceType']", - DNET_DATA_CITE_RESOURCE, - DNET_DATA_CITE_RESOURCE); + return prepareQualifier(doc, "//*[local-name() = 'resource']//*[local-name() = 'resourceType']", DNET_DATA_CITE_RESOURCE, DNET_DATA_CITE_RESOURCE); } } From 9de71e54a8ae4f8bab8c78b5da4d790be711918e Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Fri, 22 May 2020 10:47:39 +0200 Subject: [PATCH 08/37] filter ORCID e MAG identifiers --- .../dhp/oa/graph/raw/OdfToOafMapper.java | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java index 3a56aa8e0e..1d4f80894e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java @@ -3,6 +3,7 @@ package eu.dnetlib.dhp.oa.graph.raw; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.createOpenaireId; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.field; +import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.qualifier; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty; import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_ACCESS_MODES; import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_DATA_CITE_DATE; @@ -44,6 +45,9 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { public static final String HTTP_DX_DOI_PREIFX = "http://dx.doi.org/"; + public static final Qualifier ORCID_PID_TYPE = qualifier("ORCID", "Open Researcher and Contributor ID", DNET_PID_TYPES, DNET_PID_TYPES); + public static final Qualifier MAG_PID_TYPE = qualifier("MAGIdentifier", "Microsoft Academic Graph Identifier", DNET_PID_TYPES, DNET_PID_TYPES); + public OdfToOafMapper(final Map code2name) { super(code2name); } @@ -93,8 +97,19 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { private List preparePids(final Node n, final DataInfo info) { final List res = new ArrayList<>(); for (final Object o : n.selectNodes("./datacite:nameIdentifier")) { - res - .add(structuredProperty(((Node) o).getText(), prepareQualifier((Node) o, "./@nameIdentifierScheme", DNET_PID_TYPES, DNET_PID_TYPES), info)); + + final String id = ((Node) o).getText(); + final String type = ((Node) o).valueOf("./@nameIdentifierScheme") + .trim() + .toUpperCase() + .replaceAll(" ", ""); + + if (type.startsWith("ORCID")) { + final String cleanedId = id.replaceAll("http://orcid.org/", "").replaceAll("https://orcid.org/", ""); + res.add(structuredProperty(cleanedId, ORCID_PID_TYPE, info)); + } else if (type.startsWith("MAGID")) { + res.add(structuredProperty(id, MAG_PID_TYPE, info)); + } } return res; } From 9f2d0f1b0893ba8fdba8ceef27f631a83c2aaa15 Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Fri, 22 May 2020 11:00:27 +0200 Subject: [PATCH 09/37] filter ORCID e MAG identifiers --- .../dhp/oa/graph/raw/OdfToOafMapper.java | 3 ++- .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 23 ++++++++++++------- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java index 1d4f80894e..d51433a182 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java @@ -102,7 +102,8 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { final String type = ((Node) o).valueOf("./@nameIdentifierScheme") .trim() .toUpperCase() - .replaceAll(" ", ""); + .replaceAll(" ", "") + .replaceAll("_", ""); if (type.startsWith("ORCID")) { final String cleanedId = id.replaceAll("http://orcid.org/", "").replaceAll("https://orcid.org/", ""); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index 5a006e3513..6ff76e839a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -21,7 +21,14 @@ import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.Author; +import eu.dnetlib.dhp.schema.oaf.Dataset; +import eu.dnetlib.dhp.schema.oaf.Field; +import eu.dnetlib.dhp.schema.oaf.Oaf; +import eu.dnetlib.dhp.schema.oaf.Publication; +import eu.dnetlib.dhp.schema.oaf.Relation; +import eu.dnetlib.dhp.schema.oaf.Software; +import eu.dnetlib.dhp.schema.oaf.StructuredProperty; @ExtendWith(MockitoExtension.class) public class MappersTest { @@ -54,13 +61,13 @@ public class MappersTest { assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue())); assertTrue(p.getAuthor().size() > 0); - Optional author = p + final Optional author = p .getAuthor() .stream() .filter(a -> a.getPid() != null && !a.getPid().isEmpty()) .findFirst(); assertTrue(author.isPresent()); - StructuredProperty pid = author + final StructuredProperty pid = author .get() .getPid() .stream() @@ -121,13 +128,13 @@ public class MappersTest { assertTrue(StringUtils.isNotBlank(d.getTitle().get(0).getValue())); assertTrue(d.getAuthor().size() > 0); - Optional author = d + final Optional author = d .getAuthor() .stream() .filter(a -> a.getPid() != null && !a.getPid().isEmpty()) .findFirst(); assertTrue(author.isPresent()); - StructuredProperty pid = author + final StructuredProperty pid = author .get() .getPid() .stream() @@ -135,7 +142,7 @@ public class MappersTest { .get(); assertEquals("0000-0001-9074-1619", pid.getValue()); assertEquals("ORCID", pid.getQualifier().getClassid()); - assertEquals("ORCID", pid.getQualifier().getClassname()); + assertEquals("Open Researcher and Contributor ID", pid.getQualifier().getClassname()); assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemeid()); assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemename()); assertEquals("Baracchini, Theo", author.get().getFullname()); @@ -143,13 +150,13 @@ public class MappersTest { assertEquals("Theo", author.get().getName()); assertEquals(1, author.get().getAffiliation().size()); - Optional> opAff = author + final Optional> opAff = author .get() .getAffiliation() .stream() .findFirst(); assertTrue(opAff.isPresent()); - Field affiliation = opAff.get(); + final Field affiliation = opAff.get(); assertEquals("ISTI-CNR", affiliation.getValue()); assertTrue(d.getSubject().size() > 0); From dc4621b3cb98ab01185e350e99f35091b628d0c5 Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Fri, 22 May 2020 12:25:01 +0200 Subject: [PATCH 10/37] filter ORCID e MAG identifiers --- .../raw/AbstractMdRecordToOafMapper.java | 223 ++++++++---------- .../dhp/oa/graph/raw/OafToOafMapper.java | 113 +++++---- .../dhp/oa/graph/raw/OdfToOafMapper.java | 5 - .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 2 +- 4 files changed, 169 insertions(+), 174 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java index b9c4e6c804..84b200c076 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java @@ -10,7 +10,16 @@ import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.listFields; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.oaiIProvenance; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.qualifier; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty; -import static eu.dnetlib.dhp.schema.common.ModelConstants.*; +import static eu.dnetlib.dhp.schema.common.ModelConstants.DATASET_DEFAULT_RESULTTYPE; +import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PID_TYPES; +import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PRODUCED_BY; +import static eu.dnetlib.dhp.schema.common.ModelConstants.ORP_DEFAULT_RESULTTYPE; +import static eu.dnetlib.dhp.schema.common.ModelConstants.OUTCOME; +import static eu.dnetlib.dhp.schema.common.ModelConstants.PRODUCES; +import static eu.dnetlib.dhp.schema.common.ModelConstants.PUBLICATION_DEFAULT_RESULTTYPE; +import static eu.dnetlib.dhp.schema.common.ModelConstants.REPOSITORY_PROVENANCE_ACTIONS; +import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_PROJECT; +import static eu.dnetlib.dhp.schema.common.ModelConstants.SOFTWARE_DEFAULT_RESULTTYPE; import java.util.ArrayList; import java.util.Arrays; @@ -50,6 +59,8 @@ public abstract class AbstractMdRecordToOafMapper { protected static final String DATACITE_SCHEMA_KERNEL_4 = "http://datacite.org/schema/kernel-4"; protected static final String DATACITE_SCHEMA_KERNEL_3 = "http://datacite.org/schema/kernel-3"; + protected static final Qualifier ORCID_PID_TYPE = qualifier("ORCID", "Open Researcher and Contributor ID", DNET_PID_TYPES, DNET_PID_TYPES); + protected static final Qualifier MAG_PID_TYPE = qualifier("MAGIdentifier", "Microsoft Academic Graph Identifier", DNET_PID_TYPES, DNET_PID_TYPES); protected static final Map nsContext = new HashMap<>(); @@ -63,8 +74,7 @@ public abstract class AbstractMdRecordToOafMapper { nsContext.put("datacite", DATACITE_SCHEMA_KERNEL_3); } - protected static final Qualifier MAIN_TITLE_QUALIFIER = qualifier( - "main title", "main title", "dnet:dataCite_title", "dnet:dataCite_title"); + protected static final Qualifier MAIN_TITLE_QUALIFIER = qualifier("main title", "main title", "dnet:dataCite_title", "dnet:dataCite_title"); protected AbstractMdRecordToOafMapper(final Map code2name) { this.code2name = code2name; @@ -75,24 +85,18 @@ public abstract class AbstractMdRecordToOafMapper { DocumentFactory.getInstance().setXPathNamespaceURIs(nsContext); final Document doc = DocumentHelper - .parseText( - xml.replaceAll(DATACITE_SCHEMA_KERNEL_4, DATACITE_SCHEMA_KERNEL_3)); + .parseText(xml.replaceAll(DATACITE_SCHEMA_KERNEL_4, DATACITE_SCHEMA_KERNEL_3)); final String type = doc.valueOf("//dr:CobjCategory/@type"); - final KeyValue collectedFrom = getProvenanceDatasource( - doc, "//oaf:collectedFrom/@id", "//oaf:collectedFrom/@name"); + final KeyValue collectedFrom = getProvenanceDatasource(doc, "//oaf:collectedFrom/@id", "//oaf:collectedFrom/@name"); - if (collectedFrom == null) { - return null; - } + if (collectedFrom == null) { return null; } final KeyValue hostedBy = StringUtils.isBlank(doc.valueOf("//oaf:hostedBy/@id")) ? collectedFrom : getProvenanceDatasource(doc, "//oaf:hostedBy/@id", "//oaf:hostedBy/@name"); - if (hostedBy == null) { - return null; - } + if (hostedBy == null) { return null; } final DataInfo info = prepareDataInfo(doc); final long lastUpdateTimestamp = new Date().getTime(); @@ -103,17 +107,13 @@ public abstract class AbstractMdRecordToOafMapper { } } - private KeyValue getProvenanceDatasource(Document doc, String xpathId, String xpathName) { + private KeyValue getProvenanceDatasource(final Document doc, final String xpathId, final String xpathName) { final String dsId = doc.valueOf(xpathId); final String dsName = doc.valueOf(xpathName); - if (StringUtils.isBlank(dsId) | StringUtils.isBlank(dsName)) { - return null; - } + if (StringUtils.isBlank(dsId) | StringUtils.isBlank(dsName)) { return null; } - return keyValue( - createOpenaireId(10, dsId, true), - dsName); + return keyValue(createOpenaireId(10, dsId, true), dsName); } protected List createOafs( @@ -127,47 +127,47 @@ public abstract class AbstractMdRecordToOafMapper { final List oafs = new ArrayList<>(); switch (type.toLowerCase()) { - case "publication": - final Publication p = new Publication(); - populateResultFields(p, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp); - p.setResulttype(PUBLICATION_DEFAULT_RESULTTYPE); - p.setJournal(prepareJournal(doc, info)); - oafs.add(p); - break; - case "dataset": - final Dataset d = new Dataset(); - populateResultFields(d, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp); - d.setResulttype(DATASET_DEFAULT_RESULTTYPE); - d.setStoragedate(prepareDatasetStorageDate(doc, info)); - d.setDevice(prepareDatasetDevice(doc, info)); - d.setSize(prepareDatasetSize(doc, info)); - d.setVersion(prepareDatasetVersion(doc, info)); - d.setLastmetadataupdate(prepareDatasetLastMetadataUpdate(doc, info)); - d.setMetadataversionnumber(prepareDatasetMetadataVersionNumber(doc, info)); - d.setGeolocation(prepareDatasetGeoLocations(doc, info)); - oafs.add(d); - break; - case "software": - final Software s = new Software(); - populateResultFields(s, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp); - s.setResulttype(SOFTWARE_DEFAULT_RESULTTYPE); - s.setDocumentationUrl(prepareSoftwareDocumentationUrls(doc, info)); - s.setLicense(prepareSoftwareLicenses(doc, info)); - s.setCodeRepositoryUrl(prepareSoftwareCodeRepositoryUrl(doc, info)); - s.setProgrammingLanguage(prepareSoftwareProgrammingLanguage(doc, info)); - oafs.add(s); - break; - case "": - case "otherresearchproducts": - default: - final OtherResearchProduct o = new OtherResearchProduct(); - populateResultFields(o, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp); - o.setResulttype(ORP_DEFAULT_RESULTTYPE); - o.setContactperson(prepareOtherResearchProductContactPersons(doc, info)); - o.setContactgroup(prepareOtherResearchProductContactGroups(doc, info)); - o.setTool(prepareOtherResearchProductTools(doc, info)); - oafs.add(o); - break; + case "publication": + final Publication p = new Publication(); + populateResultFields(p, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp); + p.setResulttype(PUBLICATION_DEFAULT_RESULTTYPE); + p.setJournal(prepareJournal(doc, info)); + oafs.add(p); + break; + case "dataset": + final Dataset d = new Dataset(); + populateResultFields(d, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp); + d.setResulttype(DATASET_DEFAULT_RESULTTYPE); + d.setStoragedate(prepareDatasetStorageDate(doc, info)); + d.setDevice(prepareDatasetDevice(doc, info)); + d.setSize(prepareDatasetSize(doc, info)); + d.setVersion(prepareDatasetVersion(doc, info)); + d.setLastmetadataupdate(prepareDatasetLastMetadataUpdate(doc, info)); + d.setMetadataversionnumber(prepareDatasetMetadataVersionNumber(doc, info)); + d.setGeolocation(prepareDatasetGeoLocations(doc, info)); + oafs.add(d); + break; + case "software": + final Software s = new Software(); + populateResultFields(s, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp); + s.setResulttype(SOFTWARE_DEFAULT_RESULTTYPE); + s.setDocumentationUrl(prepareSoftwareDocumentationUrls(doc, info)); + s.setLicense(prepareSoftwareLicenses(doc, info)); + s.setCodeRepositoryUrl(prepareSoftwareCodeRepositoryUrl(doc, info)); + s.setProgrammingLanguage(prepareSoftwareProgrammingLanguage(doc, info)); + oafs.add(s); + break; + case "": + case "otherresearchproducts": + default: + final OtherResearchProduct o = new OtherResearchProduct(); + populateResultFields(o, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp); + o.setResulttype(ORP_DEFAULT_RESULTTYPE); + o.setContactperson(prepareOtherResearchProductContactPersons(doc, info)); + o.setContactgroup(prepareOtherResearchProductContactGroups(doc, info)); + o.setTool(prepareOtherResearchProductTools(doc, info)); + oafs.add(o); + break; } if (!oafs.isEmpty()) { @@ -196,23 +196,23 @@ public abstract class AbstractMdRecordToOafMapper { final String projectId = createOpenaireId(40, originalId, true); res - .add( - getRelation( - docId, projectId, RESULT_PROJECT, OUTCOME, IS_PRODUCED_BY, collectedFrom, info, - lastUpdateTimestamp)); + .add(getRelation(docId, projectId, RESULT_PROJECT, OUTCOME, IS_PRODUCED_BY, collectedFrom, info, lastUpdateTimestamp)); res - .add( - getRelation( - projectId, docId, RESULT_PROJECT, OUTCOME, PRODUCES, collectedFrom, info, - lastUpdateTimestamp)); + .add(getRelation(projectId, docId, RESULT_PROJECT, OUTCOME, PRODUCES, collectedFrom, info, lastUpdateTimestamp)); } } return res; } - protected Relation getRelation(String source, String target, String relType, String subRelType, String relClass, - KeyValue collectedFrom, DataInfo info, long lastUpdateTimestamp) { + protected Relation getRelation(final String source, + final String target, + final String relType, + final String subRelType, + final String relClass, + final KeyValue collectedFrom, + final DataInfo info, + final long lastUpdateTimestamp) { final Relation rel = new Relation(); rel.setRelType(relType); rel.setSubRelType(subRelType); @@ -244,9 +244,7 @@ public abstract class AbstractMdRecordToOafMapper { r.setOriginalId(Arrays.asList(doc.valueOf("//dri:objIdentifier"))); r.setCollectedfrom(Arrays.asList(collectedFrom)); r - .setPid( - prepareListStructProps( - doc, "//oaf:identifier", "@identifierType", "dnet:pid_types", "dnet:pid_types", info)); + .setPid(prepareListStructProps(doc, "//oaf:identifier", "@identifierType", "dnet:pid_types", "dnet:pid_types", info)); r.setDateofcollection(doc.valueOf("//dr:dateOfCollection")); r.setDateoftransformation(doc.valueOf("//dr:dateOfTransformation")); r.setExtraInfo(new ArrayList<>()); // NOT PRESENT IN MDSTORES @@ -289,7 +287,10 @@ public abstract class AbstractMdRecordToOafMapper { protected abstract Qualifier prepareResourceType(Document doc, DataInfo info); protected abstract List prepareInstances( - Document doc, DataInfo info, KeyValue collectedfrom, KeyValue hostedby); + Document doc, + DataInfo info, + KeyValue collectedfrom, + KeyValue hostedby); protected abstract List> prepareSources(Document doc, DataInfo info); @@ -314,13 +315,16 @@ public abstract class AbstractMdRecordToOafMapper { protected abstract List prepareAuthors(Document doc, DataInfo info); protected abstract List> prepareOtherResearchProductTools( - Document doc, DataInfo info); + Document doc, + DataInfo info); protected abstract List> prepareOtherResearchProductContactGroups( - Document doc, DataInfo info); + Document doc, + DataInfo info); protected abstract List> prepareOtherResearchProductContactPersons( - Document doc, DataInfo info); + Document doc, + DataInfo info); protected abstract Qualifier prepareSoftwareProgrammingLanguage(Document doc, DataInfo info); @@ -329,7 +333,8 @@ public abstract class AbstractMdRecordToOafMapper { protected abstract List prepareSoftwareLicenses(Document doc, DataInfo info); protected abstract List> prepareSoftwareDocumentationUrls( - Document doc, DataInfo info); + Document doc, + DataInfo info); protected abstract List prepareDatasetGeoLocations(Document doc, DataInfo info); @@ -357,27 +362,16 @@ public abstract class AbstractMdRecordToOafMapper { final String sp = n.valueOf("@sp"); final String vol = n.valueOf("@vol"); final String edition = n.valueOf("@edition"); - if (StringUtils.isNotBlank(name)) { - return journal( - name, - issnPrinted, - issnOnline, - issnLinking, - ep, - iss, - sp, - vol, - edition, - null, - null, - info); - } + if (StringUtils.isNotBlank(name)) { return journal(name, issnPrinted, issnOnline, issnLinking, ep, iss, sp, vol, edition, null, null, info); } } return null; } protected Qualifier prepareQualifier( - final Node node, final String xpath, final String schemeId, final String schemeName) { + final Node node, + final String xpath, + final String schemeId, + final String schemeName) { final String classId = node.valueOf(xpath); final String className = code2name.get(classId); return qualifier(classId, className, schemeId, schemeName); @@ -401,7 +395,10 @@ public abstract class AbstractMdRecordToOafMapper { } protected List prepareListStructProps( - final Node node, final String xpath, final Qualifier qualifier, final DataInfo info) { + final Node node, + final String xpath, + final Qualifier qualifier, + final DataInfo info) { final List res = new ArrayList<>(); for (final Object o : node.selectNodes(xpath)) { final Node n = (Node) o; @@ -411,19 +408,14 @@ public abstract class AbstractMdRecordToOafMapper { } protected List prepareListStructProps( - final Node node, final String xpath, final DataInfo info) { + final Node node, + final String xpath, + final DataInfo info) { final List res = new ArrayList<>(); for (final Object o : node.selectNodes(xpath)) { final Node n = (Node) o; res - .add( - structuredProperty( - n.getText(), - n.valueOf("@classid"), - n.valueOf("@classname"), - n.valueOf("@schemeid"), - n.valueOf("@schemename"), - info)); + .add(structuredProperty(n.getText(), n.valueOf("@classid"), n.valueOf("@classname"), n.valueOf("@schemeid"), n.valueOf("@schemename"), info)); } return res; } @@ -431,9 +423,7 @@ public abstract class AbstractMdRecordToOafMapper { protected OAIProvenance prepareOAIprovenance(final Document doc) { final Node n = doc.selectSingleNode("//*[local-name()='provenance']/*[local-name()='originDescription']"); - if (n == null) { - return null; - } + if (n == null) { return null; } final String identifier = n.valueOf("./*[local-name()='identifier']"); final String baseURL = n.valueOf("./*[local-name()='baseURL']"); @@ -448,10 +438,7 @@ public abstract class AbstractMdRecordToOafMapper { protected DataInfo prepareDataInfo(final Document doc) { final Node n = doc.selectSingleNode("//oaf:datainfo"); - if (n == null) { - return dataInfo( - false, null, false, false, REPOSITORY_PROVENANCE_ACTIONS, "0.9"); - } + if (n == null) { return dataInfo(false, null, false, false, REPOSITORY_PROVENANCE_ACTIONS, "0.9"); } final String paClassId = n.valueOf("./oaf:provenanceaction/@classid"); final String paClassName = n.valueOf("./oaf:provenanceaction/@classname"); @@ -463,13 +450,7 @@ public abstract class AbstractMdRecordToOafMapper { final Boolean inferred = Boolean.parseBoolean(n.valueOf("./oaf:inferred")); final String trust = n.valueOf("./oaf:trust"); - return dataInfo( - deletedbyinference, - inferenceprovenance, - inferred, - false, - qualifier(paClassId, paClassName, paSchemeId, paSchemeName), - trust); + return dataInfo(deletedbyinference, inferenceprovenance, inferred, false, qualifier(paClassId, paClassName, paSchemeId, paSchemeName), trust); } protected Field prepareField(final Node node, final String xpath, final DataInfo info) { @@ -477,7 +458,9 @@ public abstract class AbstractMdRecordToOafMapper { } protected List> prepareListFields( - final Node node, final String xpath, final DataInfo info) { + final Node node, + final String xpath, + final DataInfo info) { return listFields(info, prepareListString(node, xpath)); } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java index ed09016da8..24a8b45270 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java @@ -1,10 +1,19 @@ package eu.dnetlib.dhp.oa.graph.raw; -import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.*; -import static eu.dnetlib.dhp.schema.common.ModelConstants.*; +import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.createOpenaireId; +import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.field; +import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty; +import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_ACCESS_MODES; +import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_LANGUAGES; +import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PUBLICATION_RESOURCE; +import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_RELATED_TO; +import static eu.dnetlib.dhp.schema.common.ModelConstants.PUBLICATION_DATASET; +import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_RESULT; -import java.util.*; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; import java.util.stream.Collectors; import org.apache.commons.lang3.StringUtils; @@ -15,8 +24,15 @@ import org.dom4j.Node; import com.google.common.collect.Lists; import eu.dnetlib.dhp.oa.graph.raw.common.PacePerson; -import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.Author; +import eu.dnetlib.dhp.schema.oaf.DataInfo; +import eu.dnetlib.dhp.schema.oaf.Field; +import eu.dnetlib.dhp.schema.oaf.GeoLocation; +import eu.dnetlib.dhp.schema.oaf.Instance; +import eu.dnetlib.dhp.schema.oaf.KeyValue; +import eu.dnetlib.dhp.schema.oaf.Oaf; +import eu.dnetlib.dhp.schema.oaf.Qualifier; +import eu.dnetlib.dhp.schema.oaf.StructuredProperty; public class OafToOafMapper extends AbstractMdRecordToOafMapper { @@ -39,14 +55,22 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { author.setSurname(p.getNormalisedSurname()); } - final String pid = e.attributeValue("nameIdentifier"); - final String pidType = e.attributeValue("nameIdentifierScheme"); + final String pid = e.valueOf("./@nameIdentifier"); + final String type = e.valueOf("./@nameIdentifierScheme") + .trim() + .toUpperCase() + .replaceAll(" ", "") + .replaceAll("_", ""); author.setPid(new ArrayList<>()); - if (StringUtils.isNotBlank(pid) && StringUtils.isNotBlank(pidType)) { - author - .getPid() - .add(structuredProperty(pid, qualifier(pidType, pidType, DNET_PID_TYPES, DNET_PID_TYPES), info)); + + if (StringUtils.isNotBlank(pid)) { + if (type.startsWith("ORCID")) { + final String cleanedId = pid.replaceAll("http://orcid.org/", "").replaceAll("https://orcid.org/", ""); + author.getPid().add(structuredProperty(cleanedId, ORCID_PID_TYPE, info)); + } else if (type.startsWith("MAGID")) { + author.getPid().add(structuredProperty(pid, MAG_PID_TYPE, info)); + } } res.add(author); @@ -103,38 +127,29 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { final Instance instance = new Instance(); instance - .setInstancetype( - prepareQualifier( - doc, - "//dr:CobjCategory", - DNET_PUBLICATION_RESOURCE, - DNET_PUBLICATION_RESOURCE)); + .setInstancetype(prepareQualifier(doc, "//dr:CobjCategory", DNET_PUBLICATION_RESOURCE, DNET_PUBLICATION_RESOURCE)); instance.setCollectedfrom(collectedfrom); instance.setHostedby(hostedby); instance.setDateofacceptance(field(doc.valueOf("//oaf:dateAccepted"), info)); instance.setDistributionlocation(doc.valueOf("//oaf:distributionlocation")); instance - .setAccessright( - prepareQualifier(doc, "//oaf:accessrights", DNET_ACCESS_MODES, DNET_ACCESS_MODES)); + .setAccessright(prepareQualifier(doc, "//oaf:accessrights", DNET_ACCESS_MODES, DNET_ACCESS_MODES)); instance.setLicense(field(doc.valueOf("//oaf:license"), info)); instance.setRefereed(field(doc.valueOf("//oaf:refereed"), info)); instance - .setProcessingchargeamount( - field(doc.valueOf("//oaf:processingchargeamount"), info)); + .setProcessingchargeamount(field(doc.valueOf("//oaf:processingchargeamount"), info)); instance - .setProcessingchargecurrency( - field(doc.valueOf("//oaf:processingchargeamount/@currency"), info)); + .setProcessingchargecurrency(field(doc.valueOf("//oaf:processingchargeamount/@currency"), info)); - List nodes = Lists.newArrayList(doc.selectNodes("//dc:identifier")); + final List nodes = Lists.newArrayList(doc.selectNodes("//dc:identifier")); instance - .setUrl( - nodes - .stream() - .filter(n -> StringUtils.isNotBlank(n.getText())) - .map(n -> n.getText().trim()) - .filter(u -> u.startsWith("http")) - .distinct() - .collect(Collectors.toCollection(ArrayList::new))); + .setUrl(nodes + .stream() + .filter(n -> StringUtils.isNotBlank(n.getText())) + .map(n -> n.getText().trim()) + .filter(u -> u.startsWith("http")) + .distinct() + .collect(Collectors.toCollection(ArrayList::new))); return Lists.newArrayList(instance); } @@ -158,19 +173,22 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { @Override protected Field prepareSoftwareCodeRepositoryUrl( - final Document doc, final DataInfo info) { + final Document doc, + final DataInfo info) { return null; // NOT PRESENT IN OAF } @Override protected List prepareSoftwareLicenses( - final Document doc, final DataInfo info) { + final Document doc, + final DataInfo info) { return new ArrayList<>(); // NOT PRESENT IN OAF } @Override protected List> prepareSoftwareDocumentationUrls( - final Document doc, final DataInfo info) { + final Document doc, + final DataInfo info) { return new ArrayList<>(); // NOT PRESENT IN OAF } @@ -182,13 +200,15 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { @Override protected Field prepareDatasetMetadataVersionNumber( - final Document doc, final DataInfo info) { + final Document doc, + final DataInfo info) { return null; // NOT PRESENT IN OAF } @Override protected Field prepareDatasetLastMetadataUpdate( - final Document doc, final DataInfo info) { + final Document doc, + final DataInfo info) { return null; // NOT PRESENT IN OAF } @@ -216,19 +236,22 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { @Override protected List> prepareOtherResearchProductTools( - final Document doc, final DataInfo info) { + final Document doc, + final DataInfo info) { return new ArrayList<>(); // NOT PRESENT IN OAF } @Override protected List> prepareOtherResearchProductContactGroups( - final Document doc, final DataInfo info) { + final Document doc, + final DataInfo info) { return new ArrayList<>(); // NOT PRESENT IN OAF } @Override protected List> prepareOtherResearchProductContactPersons( - final Document doc, final DataInfo info) { + final Document doc, + final DataInfo info) { return new ArrayList<>(); // NOT PRESENT IN OAF } @@ -251,15 +274,9 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { final String otherId = createOpenaireId(50, originalId, false); res - .add( - getRelation( - docId, otherId, RESULT_RESULT, PUBLICATION_DATASET, IS_RELATED_TO, collectedFrom, info, - lastUpdateTimestamp)); + .add(getRelation(docId, otherId, RESULT_RESULT, PUBLICATION_DATASET, IS_RELATED_TO, collectedFrom, info, lastUpdateTimestamp)); res - .add( - getRelation( - otherId, docId, RESULT_RESULT, PUBLICATION_DATASET, IS_RELATED_TO, collectedFrom, info, - lastUpdateTimestamp)); + .add(getRelation(otherId, docId, RESULT_RESULT, PUBLICATION_DATASET, IS_RELATED_TO, collectedFrom, info, lastUpdateTimestamp)); } } return res; diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java index d51433a182..e7edfb2487 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java @@ -3,13 +3,11 @@ package eu.dnetlib.dhp.oa.graph.raw; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.createOpenaireId; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.field; -import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.qualifier; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty; import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_ACCESS_MODES; import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_DATA_CITE_DATE; import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_DATA_CITE_RESOURCE; import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_LANGUAGES; -import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PID_TYPES; import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PUBLICATION_RESOURCE; import static eu.dnetlib.dhp.schema.common.ModelConstants.HAS_PARTS; import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PART_OF; @@ -45,9 +43,6 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { public static final String HTTP_DX_DOI_PREIFX = "http://dx.doi.org/"; - public static final Qualifier ORCID_PID_TYPE = qualifier("ORCID", "Open Researcher and Contributor ID", DNET_PID_TYPES, DNET_PID_TYPES); - public static final Qualifier MAG_PID_TYPE = qualifier("MAGIdentifier", "Microsoft Academic Graph Identifier", DNET_PID_TYPES, DNET_PID_TYPES); - public OdfToOafMapper(final Map code2name) { super(code2name); } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index 6ff76e839a..631e7235ec 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -75,7 +75,7 @@ public class MappersTest { .get(); assertEquals("0000-0001-6651-1178", pid.getValue()); assertEquals("ORCID", pid.getQualifier().getClassid()); - assertEquals("ORCID", pid.getQualifier().getClassname()); + assertEquals("Open Researcher and Contributor ID", pid.getQualifier().getClassname()); assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemeid()); assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemename()); assertEquals("Votsi,Nefta", author.get().getFullname()); From 3cf2796ac6a945fe63377b3b5c062bed5af21014 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 22 May 2020 12:34:00 +0200 Subject: [PATCH 11/37] code formatting --- .../eu/dnetlib/dhp/oa/dedup/AuthorMerger.java | 259 +++++++++--------- .../dhp/oa/dedup/DedupRecordFactory.java | 8 +- .../dhp/oa/dedup/EntityMergerTest.java | 69 ++--- .../raw/AbstractMdRecordToOafMapper.java | 141 ++++++---- .../dhp/oa/graph/raw/OafToOafMapper.java | 35 ++- .../dhp/oa/graph/raw/OdfToOafMapper.java | 47 +++- 6 files changed, 315 insertions(+), 244 deletions(-) diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/AuthorMerger.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/AuthorMerger.java index 108f4a4bec..43df19f8a4 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/AuthorMerger.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/AuthorMerger.java @@ -1,159 +1,164 @@ -package eu.dnetlib.dhp.oa.dedup; -import com.wcohen.ss.JaroWinkler; -import eu.dnetlib.dhp.schema.oaf.Author; -import eu.dnetlib.dhp.schema.oaf.StructuredProperty; -import eu.dnetlib.pace.model.Person; -import org.apache.commons.lang3.StringUtils; -import scala.Tuple2; +package eu.dnetlib.dhp.oa.dedup; import java.text.Normalizer; import java.util.*; import java.util.stream.Collectors; +import org.apache.commons.lang3.StringUtils; + +import com.wcohen.ss.JaroWinkler; + +import eu.dnetlib.dhp.schema.oaf.Author; +import eu.dnetlib.dhp.schema.oaf.StructuredProperty; +import eu.dnetlib.pace.model.Person; +import scala.Tuple2; + public class AuthorMerger { - private static final Double THRESHOLD = 0.95; + private static final Double THRESHOLD = 0.95; - public static List merge(List> authors){ + public static List merge(List> authors) { - authors.sort(new Comparator>() { - @Override - public int compare(List o1, List o2) { - return -Integer.compare(countAuthorsPids(o1), countAuthorsPids(o2)); - } - }); + authors.sort(new Comparator>() { + @Override + public int compare(List o1, List o2) { + return -Integer.compare(countAuthorsPids(o1), countAuthorsPids(o2)); + } + }); - List author = new ArrayList<>(); + List author = new ArrayList<>(); - for(List a : authors){ - author = mergeAuthor(author, a); - } + for (List a : authors) { + author = mergeAuthor(author, a); + } - return author; + return author; - } + } - public static List mergeAuthor(final List a, final List b) { - int pa = countAuthorsPids(a); - int pb = countAuthorsPids(b); - List base, enrich; - int sa = authorsSize(a); - int sb = authorsSize(b); + public static List mergeAuthor(final List a, final List b) { + int pa = countAuthorsPids(a); + int pb = countAuthorsPids(b); + List base, enrich; + int sa = authorsSize(a); + int sb = authorsSize(b); - if (pa == pb) { - base = sa > sb ? a : b; - enrich = sa > sb ? b : a; - } else { - base = pa > pb ? a : b; - enrich = pa > pb ? b : a; - } - enrichPidFromList(base, enrich); - return base; - } + if (pa == pb) { + base = sa > sb ? a : b; + enrich = sa > sb ? b : a; + } else { + base = pa > pb ? a : b; + enrich = pa > pb ? b : a; + } + enrichPidFromList(base, enrich); + return base; + } - private static void enrichPidFromList(List base, List enrich) { - if (base == null || enrich == null) - return; - final Map basePidAuthorMap = base - .stream() - .filter(a -> a.getPid() != null && a.getPid().size() > 0) - .flatMap( - a -> a - .getPid() - .stream() - .map(p -> new Tuple2<>(pidToComparableString(p), a))) - .collect(Collectors.toMap(Tuple2::_1, Tuple2::_2, (x1, x2) -> x1)); + private static void enrichPidFromList(List base, List enrich) { + if (base == null || enrich == null) + return; + final Map basePidAuthorMap = base + .stream() + .filter(a -> a.getPid() != null && a.getPid().size() > 0) + .flatMap( + a -> a + .getPid() + .stream() + .map(p -> new Tuple2<>(pidToComparableString(p), a))) + .collect(Collectors.toMap(Tuple2::_1, Tuple2::_2, (x1, x2) -> x1)); - final List> pidToEnrich = enrich - .stream() - .filter(a -> a.getPid() != null && a.getPid().size() > 0) - .flatMap( - a -> a - .getPid() - .stream() - .filter(p -> !basePidAuthorMap.containsKey(pidToComparableString(p))) - .map(p -> new Tuple2<>(p, a))) - .collect(Collectors.toList()); + final List> pidToEnrich = enrich + .stream() + .filter(a -> a.getPid() != null && a.getPid().size() > 0) + .flatMap( + a -> a + .getPid() + .stream() + .filter(p -> !basePidAuthorMap.containsKey(pidToComparableString(p))) + .map(p -> new Tuple2<>(p, a))) + .collect(Collectors.toList()); - pidToEnrich - .forEach( - a -> { - Optional> simAuthor = base - .stream() - .map(ba -> new Tuple2<>(sim(ba, a._2()), ba)) - .max(Comparator.comparing(Tuple2::_1)); - if (simAuthor.isPresent() && simAuthor.get()._1() > THRESHOLD) { - Author r = simAuthor.get()._2(); - if (r.getPid() == null) { - r.setPid(new ArrayList<>()); - } - r.getPid().add(a._1()); - } - }); - } + pidToEnrich + .forEach( + a -> { + Optional> simAuthor = base + .stream() + .map(ba -> new Tuple2<>(sim(ba, a._2()), ba)) + .max(Comparator.comparing(Tuple2::_1)); + if (simAuthor.isPresent() && simAuthor.get()._1() > THRESHOLD) { + Author r = simAuthor.get()._2(); + if (r.getPid() == null) { + r.setPid(new ArrayList<>()); + } + r.getPid().add(a._1()); + } + }); + } - public static String pidToComparableString(StructuredProperty pid){ - return (pid.getQualifier()!=null? pid.getQualifier().getClassid()!=null?pid.getQualifier().getClassid().toLowerCase():"":"") + (pid.getValue()!=null? pid.getValue().toLowerCase():""); - } + public static String pidToComparableString(StructuredProperty pid) { + return (pid.getQualifier() != null + ? pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase() : "" + : "") + (pid.getValue() != null ? pid.getValue().toLowerCase() : ""); + } - public static int countAuthorsPids(List authors) { - if (authors == null) - return 0; + public static int countAuthorsPids(List authors) { + if (authors == null) + return 0; - return (int) authors.stream().filter(AuthorMerger::hasPid).count(); - } + return (int) authors.stream().filter(AuthorMerger::hasPid).count(); + } - private static int authorsSize(List authors) { - if (authors == null) - return 0; - return authors.size(); - } + private static int authorsSize(List authors) { + if (authors == null) + return 0; + return authors.size(); + } - private static Double sim(Author a, Author b) { + private static Double sim(Author a, Author b) { - final Person pa = parse(a); - final Person pb = parse(b); + final Person pa = parse(a); + final Person pb = parse(b); - if (pa.isAccurate() & pb.isAccurate()) { - return new JaroWinkler() - .score(normalize(pa.getSurnameString()), normalize(pb.getSurnameString())); - } else { - return new JaroWinkler() - .score(normalize(pa.getNormalisedFullname()), normalize(pb.getNormalisedFullname())); - } - } + if (pa.isAccurate() & pb.isAccurate()) { + return new JaroWinkler() + .score(normalize(pa.getSurnameString()), normalize(pb.getSurnameString())); + } else { + return new JaroWinkler() + .score(normalize(pa.getNormalisedFullname()), normalize(pb.getNormalisedFullname())); + } + } - private static boolean hasPid(Author a) { - if (a == null || a.getPid() == null || a.getPid().size() == 0) - return false; - return a.getPid().stream().anyMatch(p -> p != null && StringUtils.isNotBlank(p.getValue())); - } + private static boolean hasPid(Author a) { + if (a == null || a.getPid() == null || a.getPid().size() == 0) + return false; + return a.getPid().stream().anyMatch(p -> p != null && StringUtils.isNotBlank(p.getValue())); + } - private static Person parse(Author author) { - if (StringUtils.isNotBlank(author.getSurname())) { - return new Person(author.getSurname() + ", " + author.getName(), false); - } else { - return new Person(author.getFullname(), false); - } - } + private static Person parse(Author author) { + if (StringUtils.isNotBlank(author.getSurname())) { + return new Person(author.getSurname() + ", " + author.getName(), false); + } else { + return new Person(author.getFullname(), false); + } + } - private static String normalize(final String s) { - return nfd(s) - .toLowerCase() - // do not compact the regexes in a single expression, would cause StackOverflowError - // in case - // of large input strings - .replaceAll("(\\W)+", " ") - .replaceAll("(\\p{InCombiningDiacriticalMarks})+", " ") - .replaceAll("(\\p{Punct})+", " ") - .replaceAll("(\\d)+", " ") - .replaceAll("(\\n)+", " ") - .trim(); - } + private static String normalize(final String s) { + return nfd(s) + .toLowerCase() + // do not compact the regexes in a single expression, would cause StackOverflowError + // in case + // of large input strings + .replaceAll("(\\W)+", " ") + .replaceAll("(\\p{InCombiningDiacriticalMarks})+", " ") + .replaceAll("(\\p{Punct})+", " ") + .replaceAll("(\\d)+", " ") + .replaceAll("(\\n)+", " ") + .trim(); + } - private static String nfd(final String s) { - return Normalizer.normalize(s, Normalizer.Form.NFD); - } + private static String nfd(final String s) { + return Normalizer.normalize(s, Normalizer.Form.NFD); + } } diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java index eed783e536..8028d5a94f 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java @@ -1,3 +1,4 @@ + package eu.dnetlib.dhp.oa.dedup; import java.io.Serializable; @@ -73,7 +74,8 @@ public class DedupRecordFactory { } public static T entityMerger( - String id, Iterator> entities, long ts, DataInfo dataInfo, Class clazz) throws IllegalAccessException, InstantiationException { + String id, Iterator> entities, long ts, DataInfo dataInfo, Class clazz) + throws IllegalAccessException, InstantiationException { T entity = clazz.newInstance(); @@ -87,14 +89,14 @@ public class DedupRecordFactory { entity.mergeFrom(duplicate); if (ModelSupport.isSubClass(duplicate, Result.class)) { Result r1 = (Result) duplicate; - if (r1.getAuthor() != null && r1.getAuthor().size()>0) + if (r1.getAuthor() != null && r1.getAuthor().size() > 0) authors.add(r1.getAuthor()); if (r1.getDateofacceptance() != null) dates.add(r1.getDateofacceptance().getValue()); } }); - //set authors and date + // set authors and date if (ModelSupport.isSubClass(entity, Result.class)) { ((Result) entity).setDateofacceptance(DatePicker.pick(dates)); ((Result) entity).setAuthor(AuthorMerger.merge(authors)); diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java index 526dd73bb0..0a3bf62ea8 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java @@ -1,5 +1,8 @@ + package eu.dnetlib.dhp.oa.dedup; +import static org.junit.jupiter.api.Assertions.assertEquals; + import java.io.BufferedReader; import java.io.FileReader; import java.io.IOException; @@ -7,15 +10,13 @@ import java.io.Serializable; import java.nio.file.Paths; import java.util.*; -import eu.dnetlib.dhp.schema.oaf.*; -import eu.dnetlib.pace.util.MapDocumentUtil; import org.codehaus.jackson.map.ObjectMapper; import org.junit.jupiter.api.BeforeEach; - import org.junit.jupiter.api.Test; -import scala.Tuple2; -import static org.junit.jupiter.api.Assertions.assertEquals; +import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.pace.util.MapDocumentUtil; +import scala.Tuple2; public class EntityMergerTest implements Serializable { @@ -30,9 +31,9 @@ public class EntityMergerTest implements Serializable { public void setUp() throws Exception { testEntityBasePath = Paths - .get(SparkDedupTest.class.getResource("/eu/dnetlib/dhp/dedup/json").toURI()) - .toFile() - .getAbsolutePath(); + .get(SparkDedupTest.class.getResource("/eu/dnetlib/dhp/dedup/json").toURI()) + .toFile() + .getAbsolutePath(); publications = readSample(testEntityBasePath + "/publication_merge.json", Publication.class); @@ -45,7 +46,8 @@ public class EntityMergerTest implements Serializable { @Test public void publicationMergerTest() throws InstantiationException, IllegalAccessException { - Publication pub_merged = DedupRecordFactory.entityMerger(dedupId, publications.iterator(), 0, dataInfo, Publication.class); + Publication pub_merged = DedupRecordFactory + .entityMerger(dedupId, publications.iterator(), 0, dataInfo, Publication.class); assertEquals(dedupId, pub_merged.getId()); @@ -59,36 +61,36 @@ public class EntityMergerTest implements Serializable { assertEquals(pub_merged.getDateoftransformation(), pub_top.getDateoftransformation()); assertEquals(pub_merged.getOaiprovenance(), pub_top.getOaiprovenance()); assertEquals(pub_merged.getDateofcollection(), pub_top.getDateofcollection()); - assertEquals(pub_merged.getInstance().size(),3); + assertEquals(pub_merged.getInstance().size(), 3); assertEquals(pub_merged.getCountry().size(), 2); assertEquals(pub_merged.getSubject().size(), 0); assertEquals(pub_merged.getTitle().size(), 2); - assertEquals(pub_merged.getRelevantdate().size(),0); - assertEquals(pub_merged.getDescription().size(),0); - assertEquals(pub_merged.getSource().size(),0); - assertEquals(pub_merged.getFulltext().size(),0); - assertEquals(pub_merged.getFormat().size(),0); - assertEquals(pub_merged.getContributor().size(),0); - assertEquals(pub_merged.getCoverage().size(),0); - assertEquals(pub_merged.getContext().size(),0); - assertEquals(pub_merged.getExternalReference().size(),0); - assertEquals(pub_merged.getOriginalId().size(),3); - assertEquals(pub_merged.getCollectedfrom().size(),3); - assertEquals(pub_merged.getPid().size(),1); - assertEquals(pub_merged.getExtraInfo().size(),0); + assertEquals(pub_merged.getRelevantdate().size(), 0); + assertEquals(pub_merged.getDescription().size(), 0); + assertEquals(pub_merged.getSource().size(), 0); + assertEquals(pub_merged.getFulltext().size(), 0); + assertEquals(pub_merged.getFormat().size(), 0); + assertEquals(pub_merged.getContributor().size(), 0); + assertEquals(pub_merged.getCoverage().size(), 0); + assertEquals(pub_merged.getContext().size(), 0); + assertEquals(pub_merged.getExternalReference().size(), 0); + assertEquals(pub_merged.getOriginalId().size(), 3); + assertEquals(pub_merged.getCollectedfrom().size(), 3); + assertEquals(pub_merged.getPid().size(), 1); + assertEquals(pub_merged.getExtraInfo().size(), 0); - //verify datainfo + // verify datainfo assertEquals(pub_merged.getDataInfo(), dataInfo); - //verify datepicker + // verify datepicker assertEquals(pub_merged.getDateofacceptance().getValue(), "2018-09-30"); - //verify authors + // verify authors assertEquals(pub_merged.getAuthor().size(), 9); assertEquals(AuthorMerger.countAuthorsPids(pub_merged.getAuthor()), 4); } - public DataInfo setDI(){ + public DataInfo setDI() { DataInfo dataInfo = new DataInfo(); dataInfo.setTrust("0.9"); dataInfo.setDeletedbyinference(false); @@ -97,13 +99,13 @@ public class EntityMergerTest implements Serializable { return dataInfo; } - public Publication getTopPub(List> publications){ + public Publication getTopPub(List> publications) { Double maxTrust = 0.0; Publication maxPub = new Publication(); for (Tuple2 publication : publications) { Double pubTrust = Double.parseDouble(publication._2().getDataInfo().getTrust()); - if(pubTrust > maxTrust){ + if (pubTrust > maxTrust) { maxTrust = pubTrust; maxPub = publication._2(); } @@ -118,11 +120,11 @@ public class EntityMergerTest implements Serializable { reader = new BufferedReader(new FileReader(path)); String line = reader.readLine(); while (line != null) { - res.add( + res + .add( new Tuple2<>( - MapDocumentUtil.getJPathString("$.id", line), - new ObjectMapper().readValue(line, clazz)) - ); + MapDocumentUtil.getJPathString("$.id", line), + new ObjectMapper().readValue(line, clazz))); // read next line line = reader.readLine(); } @@ -134,5 +136,4 @@ public class EntityMergerTest implements Serializable { return res; } - } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java index 84b200c076..c4639eb44a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java @@ -59,8 +59,10 @@ public abstract class AbstractMdRecordToOafMapper { protected static final String DATACITE_SCHEMA_KERNEL_4 = "http://datacite.org/schema/kernel-4"; protected static final String DATACITE_SCHEMA_KERNEL_3 = "http://datacite.org/schema/kernel-3"; - protected static final Qualifier ORCID_PID_TYPE = qualifier("ORCID", "Open Researcher and Contributor ID", DNET_PID_TYPES, DNET_PID_TYPES); - protected static final Qualifier MAG_PID_TYPE = qualifier("MAGIdentifier", "Microsoft Academic Graph Identifier", DNET_PID_TYPES, DNET_PID_TYPES); + protected static final Qualifier ORCID_PID_TYPE = qualifier( + "ORCID", "Open Researcher and Contributor ID", DNET_PID_TYPES, DNET_PID_TYPES); + protected static final Qualifier MAG_PID_TYPE = qualifier( + "MAGIdentifier", "Microsoft Academic Graph Identifier", DNET_PID_TYPES, DNET_PID_TYPES); protected static final Map nsContext = new HashMap<>(); @@ -74,7 +76,8 @@ public abstract class AbstractMdRecordToOafMapper { nsContext.put("datacite", DATACITE_SCHEMA_KERNEL_3); } - protected static final Qualifier MAIN_TITLE_QUALIFIER = qualifier("main title", "main title", "dnet:dataCite_title", "dnet:dataCite_title"); + protected static final Qualifier MAIN_TITLE_QUALIFIER = qualifier( + "main title", "main title", "dnet:dataCite_title", "dnet:dataCite_title"); protected AbstractMdRecordToOafMapper(final Map code2name) { this.code2name = code2name; @@ -88,15 +91,20 @@ public abstract class AbstractMdRecordToOafMapper { .parseText(xml.replaceAll(DATACITE_SCHEMA_KERNEL_4, DATACITE_SCHEMA_KERNEL_3)); final String type = doc.valueOf("//dr:CobjCategory/@type"); - final KeyValue collectedFrom = getProvenanceDatasource(doc, "//oaf:collectedFrom/@id", "//oaf:collectedFrom/@name"); + final KeyValue collectedFrom = getProvenanceDatasource( + doc, "//oaf:collectedFrom/@id", "//oaf:collectedFrom/@name"); - if (collectedFrom == null) { return null; } + if (collectedFrom == null) { + return null; + } final KeyValue hostedBy = StringUtils.isBlank(doc.valueOf("//oaf:hostedBy/@id")) ? collectedFrom : getProvenanceDatasource(doc, "//oaf:hostedBy/@id", "//oaf:hostedBy/@name"); - if (hostedBy == null) { return null; } + if (hostedBy == null) { + return null; + } final DataInfo info = prepareDataInfo(doc); final long lastUpdateTimestamp = new Date().getTime(); @@ -111,7 +119,9 @@ public abstract class AbstractMdRecordToOafMapper { final String dsId = doc.valueOf(xpathId); final String dsName = doc.valueOf(xpathName); - if (StringUtils.isBlank(dsId) | StringUtils.isBlank(dsName)) { return null; } + if (StringUtils.isBlank(dsId) | StringUtils.isBlank(dsName)) { + return null; + } return keyValue(createOpenaireId(10, dsId, true), dsName); } @@ -127,47 +137,47 @@ public abstract class AbstractMdRecordToOafMapper { final List oafs = new ArrayList<>(); switch (type.toLowerCase()) { - case "publication": - final Publication p = new Publication(); - populateResultFields(p, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp); - p.setResulttype(PUBLICATION_DEFAULT_RESULTTYPE); - p.setJournal(prepareJournal(doc, info)); - oafs.add(p); - break; - case "dataset": - final Dataset d = new Dataset(); - populateResultFields(d, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp); - d.setResulttype(DATASET_DEFAULT_RESULTTYPE); - d.setStoragedate(prepareDatasetStorageDate(doc, info)); - d.setDevice(prepareDatasetDevice(doc, info)); - d.setSize(prepareDatasetSize(doc, info)); - d.setVersion(prepareDatasetVersion(doc, info)); - d.setLastmetadataupdate(prepareDatasetLastMetadataUpdate(doc, info)); - d.setMetadataversionnumber(prepareDatasetMetadataVersionNumber(doc, info)); - d.setGeolocation(prepareDatasetGeoLocations(doc, info)); - oafs.add(d); - break; - case "software": - final Software s = new Software(); - populateResultFields(s, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp); - s.setResulttype(SOFTWARE_DEFAULT_RESULTTYPE); - s.setDocumentationUrl(prepareSoftwareDocumentationUrls(doc, info)); - s.setLicense(prepareSoftwareLicenses(doc, info)); - s.setCodeRepositoryUrl(prepareSoftwareCodeRepositoryUrl(doc, info)); - s.setProgrammingLanguage(prepareSoftwareProgrammingLanguage(doc, info)); - oafs.add(s); - break; - case "": - case "otherresearchproducts": - default: - final OtherResearchProduct o = new OtherResearchProduct(); - populateResultFields(o, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp); - o.setResulttype(ORP_DEFAULT_RESULTTYPE); - o.setContactperson(prepareOtherResearchProductContactPersons(doc, info)); - o.setContactgroup(prepareOtherResearchProductContactGroups(doc, info)); - o.setTool(prepareOtherResearchProductTools(doc, info)); - oafs.add(o); - break; + case "publication": + final Publication p = new Publication(); + populateResultFields(p, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp); + p.setResulttype(PUBLICATION_DEFAULT_RESULTTYPE); + p.setJournal(prepareJournal(doc, info)); + oafs.add(p); + break; + case "dataset": + final Dataset d = new Dataset(); + populateResultFields(d, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp); + d.setResulttype(DATASET_DEFAULT_RESULTTYPE); + d.setStoragedate(prepareDatasetStorageDate(doc, info)); + d.setDevice(prepareDatasetDevice(doc, info)); + d.setSize(prepareDatasetSize(doc, info)); + d.setVersion(prepareDatasetVersion(doc, info)); + d.setLastmetadataupdate(prepareDatasetLastMetadataUpdate(doc, info)); + d.setMetadataversionnumber(prepareDatasetMetadataVersionNumber(doc, info)); + d.setGeolocation(prepareDatasetGeoLocations(doc, info)); + oafs.add(d); + break; + case "software": + final Software s = new Software(); + populateResultFields(s, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp); + s.setResulttype(SOFTWARE_DEFAULT_RESULTTYPE); + s.setDocumentationUrl(prepareSoftwareDocumentationUrls(doc, info)); + s.setLicense(prepareSoftwareLicenses(doc, info)); + s.setCodeRepositoryUrl(prepareSoftwareCodeRepositoryUrl(doc, info)); + s.setProgrammingLanguage(prepareSoftwareProgrammingLanguage(doc, info)); + oafs.add(s); + break; + case "": + case "otherresearchproducts": + default: + final OtherResearchProduct o = new OtherResearchProduct(); + populateResultFields(o, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp); + o.setResulttype(ORP_DEFAULT_RESULTTYPE); + o.setContactperson(prepareOtherResearchProductContactPersons(doc, info)); + o.setContactgroup(prepareOtherResearchProductContactGroups(doc, info)); + o.setTool(prepareOtherResearchProductTools(doc, info)); + oafs.add(o); + break; } if (!oafs.isEmpty()) { @@ -196,9 +206,15 @@ public abstract class AbstractMdRecordToOafMapper { final String projectId = createOpenaireId(40, originalId, true); res - .add(getRelation(docId, projectId, RESULT_PROJECT, OUTCOME, IS_PRODUCED_BY, collectedFrom, info, lastUpdateTimestamp)); + .add( + getRelation( + docId, projectId, RESULT_PROJECT, OUTCOME, IS_PRODUCED_BY, collectedFrom, info, + lastUpdateTimestamp)); res - .add(getRelation(projectId, docId, RESULT_PROJECT, OUTCOME, PRODUCES, collectedFrom, info, lastUpdateTimestamp)); + .add( + getRelation( + projectId, docId, RESULT_PROJECT, OUTCOME, PRODUCES, collectedFrom, info, + lastUpdateTimestamp)); } } @@ -244,7 +260,9 @@ public abstract class AbstractMdRecordToOafMapper { r.setOriginalId(Arrays.asList(doc.valueOf("//dri:objIdentifier"))); r.setCollectedfrom(Arrays.asList(collectedFrom)); r - .setPid(prepareListStructProps(doc, "//oaf:identifier", "@identifierType", "dnet:pid_types", "dnet:pid_types", info)); + .setPid( + prepareListStructProps( + doc, "//oaf:identifier", "@identifierType", "dnet:pid_types", "dnet:pid_types", info)); r.setDateofcollection(doc.valueOf("//dr:dateOfCollection")); r.setDateoftransformation(doc.valueOf("//dr:dateOfTransformation")); r.setExtraInfo(new ArrayList<>()); // NOT PRESENT IN MDSTORES @@ -362,7 +380,9 @@ public abstract class AbstractMdRecordToOafMapper { final String sp = n.valueOf("@sp"); final String vol = n.valueOf("@vol"); final String edition = n.valueOf("@edition"); - if (StringUtils.isNotBlank(name)) { return journal(name, issnPrinted, issnOnline, issnLinking, ep, iss, sp, vol, edition, null, null, info); } + if (StringUtils.isNotBlank(name)) { + return journal(name, issnPrinted, issnOnline, issnLinking, ep, iss, sp, vol, edition, null, null, info); + } } return null; } @@ -415,7 +435,10 @@ public abstract class AbstractMdRecordToOafMapper { for (final Object o : node.selectNodes(xpath)) { final Node n = (Node) o; res - .add(structuredProperty(n.getText(), n.valueOf("@classid"), n.valueOf("@classname"), n.valueOf("@schemeid"), n.valueOf("@schemename"), info)); + .add( + structuredProperty( + n.getText(), n.valueOf("@classid"), n.valueOf("@classname"), n.valueOf("@schemeid"), + n.valueOf("@schemename"), info)); } return res; } @@ -423,7 +446,9 @@ public abstract class AbstractMdRecordToOafMapper { protected OAIProvenance prepareOAIprovenance(final Document doc) { final Node n = doc.selectSingleNode("//*[local-name()='provenance']/*[local-name()='originDescription']"); - if (n == null) { return null; } + if (n == null) { + return null; + } final String identifier = n.valueOf("./*[local-name()='identifier']"); final String baseURL = n.valueOf("./*[local-name()='baseURL']"); @@ -438,7 +463,9 @@ public abstract class AbstractMdRecordToOafMapper { protected DataInfo prepareDataInfo(final Document doc) { final Node n = doc.selectSingleNode("//oaf:datainfo"); - if (n == null) { return dataInfo(false, null, false, false, REPOSITORY_PROVENANCE_ACTIONS, "0.9"); } + if (n == null) { + return dataInfo(false, null, false, false, REPOSITORY_PROVENANCE_ACTIONS, "0.9"); + } final String paClassId = n.valueOf("./oaf:provenanceaction/@classid"); final String paClassName = n.valueOf("./oaf:provenanceaction/@classname"); @@ -450,7 +477,9 @@ public abstract class AbstractMdRecordToOafMapper { final Boolean inferred = Boolean.parseBoolean(n.valueOf("./oaf:inferred")); final String trust = n.valueOf("./oaf:trust"); - return dataInfo(deletedbyinference, inferenceprovenance, inferred, false, qualifier(paClassId, paClassName, paSchemeId, paSchemeName), trust); + return dataInfo( + deletedbyinference, inferenceprovenance, inferred, false, + qualifier(paClassId, paClassName, paSchemeId, paSchemeName), trust); } protected Field prepareField(final Node node, final String xpath, final DataInfo info) { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java index 24a8b45270..af9fe7197e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java @@ -56,7 +56,8 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { } final String pid = e.valueOf("./@nameIdentifier"); - final String type = e.valueOf("./@nameIdentifierScheme") + final String type = e + .valueOf("./@nameIdentifierScheme") .trim() .toUpperCase() .replaceAll(" ", "") @@ -66,7 +67,9 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { if (StringUtils.isNotBlank(pid)) { if (type.startsWith("ORCID")) { - final String cleanedId = pid.replaceAll("http://orcid.org/", "").replaceAll("https://orcid.org/", ""); + final String cleanedId = pid + .replaceAll("http://orcid.org/", "") + .replaceAll("https://orcid.org/", ""); author.getPid().add(structuredProperty(cleanedId, ORCID_PID_TYPE, info)); } else if (type.startsWith("MAGID")) { author.getPid().add(structuredProperty(pid, MAG_PID_TYPE, info)); @@ -127,7 +130,8 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { final Instance instance = new Instance(); instance - .setInstancetype(prepareQualifier(doc, "//dr:CobjCategory", DNET_PUBLICATION_RESOURCE, DNET_PUBLICATION_RESOURCE)); + .setInstancetype( + prepareQualifier(doc, "//dr:CobjCategory", DNET_PUBLICATION_RESOURCE, DNET_PUBLICATION_RESOURCE)); instance.setCollectedfrom(collectedfrom); instance.setHostedby(hostedby); instance.setDateofacceptance(field(doc.valueOf("//oaf:dateAccepted"), info)); @@ -143,13 +147,14 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { final List nodes = Lists.newArrayList(doc.selectNodes("//dc:identifier")); instance - .setUrl(nodes - .stream() - .filter(n -> StringUtils.isNotBlank(n.getText())) - .map(n -> n.getText().trim()) - .filter(u -> u.startsWith("http")) - .distinct() - .collect(Collectors.toCollection(ArrayList::new))); + .setUrl( + nodes + .stream() + .filter(n -> StringUtils.isNotBlank(n.getText())) + .map(n -> n.getText().trim()) + .filter(u -> u.startsWith("http")) + .distinct() + .collect(Collectors.toCollection(ArrayList::new))); return Lists.newArrayList(instance); } @@ -274,9 +279,15 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { final String otherId = createOpenaireId(50, originalId, false); res - .add(getRelation(docId, otherId, RESULT_RESULT, PUBLICATION_DATASET, IS_RELATED_TO, collectedFrom, info, lastUpdateTimestamp)); + .add( + getRelation( + docId, otherId, RESULT_RESULT, PUBLICATION_DATASET, IS_RELATED_TO, collectedFrom, info, + lastUpdateTimestamp)); res - .add(getRelation(otherId, docId, RESULT_RESULT, PUBLICATION_DATASET, IS_RELATED_TO, collectedFrom, info, lastUpdateTimestamp)); + .add( + getRelation( + otherId, docId, RESULT_RESULT, PUBLICATION_DATASET, IS_RELATED_TO, collectedFrom, info, + lastUpdateTimestamp)); } } return res; diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java index e7edfb2487..9c74c4a938 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java @@ -94,7 +94,8 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { for (final Object o : n.selectNodes("./datacite:nameIdentifier")) { final String id = ((Node) o).getText(); - final String type = ((Node) o).valueOf("./@nameIdentifierScheme") + final String type = ((Node) o) + .valueOf("./@nameIdentifierScheme") .trim() .toUpperCase() .replaceAll(" ", "") @@ -119,7 +120,8 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { final Instance instance = new Instance(); instance - .setInstancetype(prepareQualifier(doc, "//dr:CobjCategory", DNET_PUBLICATION_RESOURCE, DNET_PUBLICATION_RESOURCE)); + .setInstancetype( + prepareQualifier(doc, "//dr:CobjCategory", DNET_PUBLICATION_RESOURCE, DNET_PUBLICATION_RESOURCE)); instance.setCollectedfrom(collectedfrom); instance.setHostedby(hostedby); instance.setDateofacceptance(field(doc.valueOf("//oaf:dateAccepted"), info)); @@ -168,7 +170,10 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { && !dateType.equalsIgnoreCase("Updated") && !dateType.equalsIgnoreCase("Available")) { res - .add(structuredProperty(((Node) o).getText(), "UNKNOWN", "UNKNOWN", DNET_DATA_CITE_DATE, DNET_DATA_CITE_DATE, info)); + .add( + structuredProperty( + ((Node) o).getText(), "UNKNOWN", "UNKNOWN", DNET_DATA_CITE_DATE, DNET_DATA_CITE_DATE, + info)); } } return res; @@ -220,14 +225,16 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { protected List> prepareOtherResearchProductContactGroups( final Document doc, final DataInfo info) { - return prepareListFields(doc, "//datacite:contributor[@contributorType='ContactGroup']/datacite:contributorName", info); + return prepareListFields( + doc, "//datacite:contributor[@contributorType='ContactGroup']/datacite:contributorName", info); } @Override protected List> prepareOtherResearchProductContactPersons( final Document doc, final DataInfo info) { - return prepareListFields(doc, "//datacite:contributor[@contributorType='ContactPerson']/datacite:contributorName", info); + return prepareListFields( + doc, "//datacite:contributor[@contributorType='ContactPerson']/datacite:contributorName", info); } @Override @@ -253,7 +260,8 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { protected List> prepareSoftwareDocumentationUrls( final Document doc, final DataInfo info) { - return prepareListFields(doc, "//datacite:relatedIdentifier[@relatedIdentifierType='URL' and @relationType='IsDocumentedBy']", info); + return prepareListFields( + doc, "//datacite:relatedIdentifier[@relatedIdentifierType='URL' and @relationType='IsDocumentedBy']", info); } // DATASETS @@ -327,16 +335,29 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { if (type.equalsIgnoreCase("IsSupplementTo")) { res - .add(getRelation(docId, otherId, RESULT_RESULT, SUPPLEMENT, IS_SUPPLEMENT_TO, collectedFrom, info, lastUpdateTimestamp)); + .add( + getRelation( + docId, otherId, RESULT_RESULT, SUPPLEMENT, IS_SUPPLEMENT_TO, collectedFrom, info, + lastUpdateTimestamp)); res - .add(getRelation(otherId, docId, RESULT_RESULT, SUPPLEMENT, IS_SUPPLEMENTED_BY, collectedFrom, info, lastUpdateTimestamp)); + .add( + getRelation( + otherId, docId, RESULT_RESULT, SUPPLEMENT, IS_SUPPLEMENTED_BY, collectedFrom, info, + lastUpdateTimestamp)); } else if (type.equals("IsPartOf")) { res - .add(getRelation(docId, otherId, RESULT_RESULT, PART, IS_PART_OF, collectedFrom, info, lastUpdateTimestamp)); + .add( + getRelation( + docId, otherId, RESULT_RESULT, PART, IS_PART_OF, collectedFrom, info, + lastUpdateTimestamp)); res - .add(getRelation(otherId, docId, RESULT_RESULT, PART, HAS_PARTS, collectedFrom, info, lastUpdateTimestamp)); - } else {} + .add( + getRelation( + otherId, docId, RESULT_RESULT, PART, HAS_PARTS, collectedFrom, info, + lastUpdateTimestamp)); + } else { + } } } return res; @@ -344,6 +365,8 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { @Override protected Qualifier prepareResourceType(final Document doc, final DataInfo info) { - return prepareQualifier(doc, "//*[local-name() = 'resource']//*[local-name() = 'resourceType']", DNET_DATA_CITE_RESOURCE, DNET_DATA_CITE_RESOURCE); + return prepareQualifier( + doc, "//*[local-name() = 'resource']//*[local-name() = 'resourceType']", DNET_DATA_CITE_RESOURCE, + DNET_DATA_CITE_RESOURCE); } } From 4308f311650bca96eab1ad5d3e09c87f56d97901 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 22 May 2020 13:13:01 +0200 Subject: [PATCH 12/37] added fix to make test run --- .../ResultToOrganizationJobTest.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/ResultToOrganizationJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/ResultToOrganizationJobTest.java index 435b766052..cfcccc5f0d 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/ResultToOrganizationJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/ResultToOrganizationJobTest.java @@ -99,6 +99,7 @@ public class ResultToOrganizationJobTest { .map(item -> OBJECT_MAPPER.readValue(item, Relation.class)); Assertions.assertEquals(0, tmp.count()); + FileUtils.deleteDirectory(workingDir.toFile()); } /** @@ -171,6 +172,7 @@ public class ResultToOrganizationJobTest { + "(target = '20|opendoar____::124266ebc4ece2934eb80edfda3f2091' " + "or target = '20|dedup_wf_001::5168917a6aeeea55269daeac1af2ecd2')") .count()); + FileUtils.deleteDirectory(workingDir.toFile()); } @Test @@ -266,5 +268,6 @@ public class ResultToOrganizationJobTest { "relclass = 'isAuthorInstitutionOf' and " + "substring(source, 1,2) = '20' and substring(target, 1, 2) = '50'") .count()); + FileUtils.deleteDirectory(workingDir.toFile()); } } From 8610ad5142c932a8eddb2deb09475fd7bce1f0d4 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 22 May 2020 15:32:55 +0200 Subject: [PATCH 13/37] added groupby id to fix multiple result with same id at join step --- .../PrepareResultCommunitySet.java | 24 +++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java index 5574aad753..5d0b75a8e0 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java @@ -7,6 +7,7 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import java.util.*; import org.apache.commons.io.IOUtils; +import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.*; @@ -19,6 +20,7 @@ import com.google.gson.Gson; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Relation; +import scala.Tuple2; public class PrepareResultCommunitySet { @@ -93,10 +95,24 @@ public class PrepareResultCommunitySet { result_organizationset .map(mapResultCommunityFn(organizationMap), Encoders.bean(ResultCommunityList.class)) .filter(Objects::nonNull) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .json(outputPath); + .toJavaRDD() + .mapToPair(value -> new Tuple2<>(value.getResultId(), value)) + .reduceByKey((a, b) -> { + ArrayList cl = a.getCommunityList(); + b.getCommunityList().stream().forEach(s -> { + if (!cl.contains(s)) { + cl.add(s); + } + }); + a.setCommunityList(cl); + return a; + }) + .map(value -> OBJECT_MAPPER.writeValueAsString(value._2())) + .saveAsTextFile(outputPath, GzipCodec.class); +// .write() +// .mode(SaveMode.Overwrite) +// .option("compression", "gzip") +// .json(outputPath); } private static MapFunction mapResultCommunityFn( From 29066a6b46f94903bdbb709c5bf32552a5be5343 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 22 May 2020 15:38:50 +0200 Subject: [PATCH 14/37] applied code cleanup --- .../dhp/bulktag/community/CommunityConfiguration.java | 2 +- .../java/eu/dnetlib/dhp/bulktag/criteria/VerbResolver.java | 2 +- .../SparkResultToProjectThroughSemRelJob.java | 6 +----- .../SparkResultToOrganizationFromIstRepoJob.java | 4 +--- 4 files changed, 4 insertions(+), 10 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfiguration.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfiguration.java index 29ddde15f8..844fe29621 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfiguration.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfiguration.java @@ -131,7 +131,7 @@ public class CommunityConfiguration implements Serializable { p -> { if (p.getSnd() == null) return p.getFst(); - if (((SelectionConstraints) p.getSnd()).verifyCriteria(param)) + if (p.getSnd().verifyCriteria(param)) return p.getFst(); else return null; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolver.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolver.java index 3d0db20632..f54a1cebab 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolver.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolver.java @@ -34,7 +34,7 @@ public class VerbResolver implements Serializable { .collect( Collectors .toMap( - value -> (String) ((ClassInfo) value) + value -> (String) value .getAnnotationInfo() .get(0) .getParameterValues() diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java index 1f6264c186..17f6a057d8 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java @@ -105,11 +105,7 @@ public class SparkResultToProjectThroughSemRelJob { .stream() .forEach( (p -> { - if (potential_update - .getProjectSet() - .contains(p)) { - potential_update.getProjectSet().remove(p); - } + potential_update.getProjectSet().remove(p); })); } String resId = potential_update.getResultId(); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java index 0c5e1d8be6..ff34bd42a7 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java @@ -136,9 +136,7 @@ public class SparkResultToOrganizationFromIstRepoJob { .stream() .forEach( rId -> { - if (organization_list.contains(rId)) { - organization_list.remove(rId); - } + organization_list.remove(rId); }); } String resultId = potential_update.getResultId(); From eb606dc1e298f397b69464eb98b96fb5c9374aea Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Fri, 22 May 2020 17:17:41 +0200 Subject: [PATCH 15/37] partial implementation of events with rels --- .../broker/oa/GenerateEventsApplication.java | 46 ++++++------- .../EnrichMissingDatasetIsReferencedBy.java | 38 ----------- .../EnrichMissingDatasetIsRelatedTo.java | 38 ----------- .../EnrichMissingDatasetIsSupplementedBy.java | 38 ----------- .../EnrichMissingDatasetIsSupplementedTo.java | 38 ----------- .../EnrichMissingDatasetReferences.java | 38 ----------- ...nrichMissingPublicationIsReferencedBy.java | 42 ------------ .../EnrichMissingPublicationIsRelatedTo.java | 42 ------------ ...ichMissingPublicationIsSupplementedBy.java | 42 ------------ ...ichMissingPublicationIsSupplementedTo.java | 42 ------------ .../EnrichMissingPublicationReferences.java | 42 ------------ .../AbstractEnrichMissingDataset.java | 63 ++++++++++++++++++ .../EnrichMissingDatasetIsReferencedBy.java | 12 ++++ .../EnrichMissingDatasetIsRelatedTo.java | 12 ++++ .../EnrichMissingDatasetIsSupplementedBy.java | 12 ++++ .../EnrichMissingDatasetIsSupplementedTo.java | 12 ++++ .../EnrichMissingDatasetReferences.java | 12 ++++ .../EnrichMissingProject.java | 3 +- .../EnrichMoreProject.java | 3 +- .../AbstractEnrichMissingPublication.java | 64 +++++++++++++++++++ ...nrichMissingPublicationIsReferencedBy.java | 12 ++++ .../EnrichMissingPublicationIsRelatedTo.java | 12 ++++ ...ichMissingPublicationIsSupplementedBy.java | 12 ++++ ...ichMissingPublicationIsSupplementedTo.java | 12 ++++ .../EnrichMissingPublicationReferences.java | 12 ++++ .../{ => simple}/EnrichMissingAbstract.java | 3 +- .../EnrichMissingAuthorOrcid.java | 3 +- .../{ => simple}/EnrichMissingOpenAccess.java | 3 +- .../{ => simple}/EnrichMissingPid.java | 3 +- .../EnrichMissingPublicationDate.java | 3 +- .../{ => simple}/EnrichMissingSoftware.java | 3 +- .../{ => simple}/EnrichMissingSubject.java | 3 +- .../{ => simple}/EnrichMoreOpenAccess.java | 3 +- .../matchers/{ => simple}/EnrichMorePid.java | 3 +- .../{ => simple}/EnrichMoreSoftware.java | 3 +- .../{ => simple}/EnrichMoreSubject.java | 3 +- .../dhp/broker/oa/util/ConversionUtils.java | 13 ++++ 37 files changed, 309 insertions(+), 436 deletions(-) delete mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingDatasetIsReferencedBy.java delete mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingDatasetIsRelatedTo.java delete mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingDatasetIsSupplementedBy.java delete mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingDatasetIsSupplementedTo.java delete mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingDatasetReferences.java delete mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationIsReferencedBy.java delete mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationIsRelatedTo.java delete mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationIsSupplementedBy.java delete mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationIsSupplementedTo.java delete mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationReferences.java create mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/AbstractEnrichMissingDataset.java create mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsReferencedBy.java create mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsRelatedTo.java create mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsSupplementedBy.java create mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsSupplementedTo.java create mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetReferences.java rename dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/{ => relatedProjects}/EnrichMissingProject.java (90%) rename dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/{ => relatedProjects}/EnrichMoreProject.java (90%) create mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/AbstractEnrichMissingPublication.java create mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsReferencedBy.java create mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsRelatedTo.java create mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsSupplementedBy.java create mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsSupplementedTo.java create mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationReferences.java rename dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/{ => simple}/EnrichMissingAbstract.java (90%) rename dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/{ => simple}/EnrichMissingAuthorOrcid.java (90%) rename dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/{ => simple}/EnrichMissingOpenAccess.java (93%) rename dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/{ => simple}/EnrichMissingPid.java (91%) rename dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/{ => simple}/EnrichMissingPublicationDate.java (90%) rename dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/{ => simple}/EnrichMissingSoftware.java (91%) rename dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/{ => simple}/EnrichMissingSubject.java (93%) rename dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/{ => simple}/EnrichMoreOpenAccess.java (93%) rename dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/{ => simple}/EnrichMorePid.java (92%) rename dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/{ => simple}/EnrichMoreSoftware.java (91%) rename dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/{ => simple}/EnrichMoreSubject.java (93%) diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java index fa425a1819..d5e577972d 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java @@ -30,30 +30,30 @@ import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.broker.model.Event; import eu.dnetlib.dhp.broker.model.EventFactory; -import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingAbstract; -import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingAuthorOrcid; -import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingDatasetIsReferencedBy; -import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingDatasetIsRelatedTo; -import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingDatasetIsSupplementedBy; -import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingDatasetIsSupplementedTo; -import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingDatasetReferences; -import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingOpenAccess; -import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingPid; -import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingProject; -import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingPublicationDate; -import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingPublicationIsReferencedBy; -import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingPublicationIsRelatedTo; -import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingPublicationIsSupplementedBy; -import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingPublicationIsSupplementedTo; -import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingPublicationReferences; -import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingSoftware; -import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingSubject; -import eu.dnetlib.dhp.broker.oa.matchers.EnrichMoreOpenAccess; -import eu.dnetlib.dhp.broker.oa.matchers.EnrichMorePid; -import eu.dnetlib.dhp.broker.oa.matchers.EnrichMoreProject; -import eu.dnetlib.dhp.broker.oa.matchers.EnrichMoreSoftware; -import eu.dnetlib.dhp.broker.oa.matchers.EnrichMoreSubject; import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; +import eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets.EnrichMissingDatasetIsReferencedBy; +import eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets.EnrichMissingDatasetIsRelatedTo; +import eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets.EnrichMissingDatasetIsSupplementedBy; +import eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets.EnrichMissingDatasetIsSupplementedTo; +import eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets.EnrichMissingDatasetReferences; +import eu.dnetlib.dhp.broker.oa.matchers.relatedProjects.EnrichMissingProject; +import eu.dnetlib.dhp.broker.oa.matchers.relatedProjects.EnrichMoreProject; +import eu.dnetlib.dhp.broker.oa.matchers.relatedPublications.EnrichMissingPublicationIsReferencedBy; +import eu.dnetlib.dhp.broker.oa.matchers.relatedPublications.EnrichMissingPublicationIsRelatedTo; +import eu.dnetlib.dhp.broker.oa.matchers.relatedPublications.EnrichMissingPublicationIsSupplementedBy; +import eu.dnetlib.dhp.broker.oa.matchers.relatedPublications.EnrichMissingPublicationIsSupplementedTo; +import eu.dnetlib.dhp.broker.oa.matchers.relatedPublications.EnrichMissingPublicationReferences; +import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMissingAbstract; +import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMissingAuthorOrcid; +import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMissingOpenAccess; +import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMissingPid; +import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMissingPublicationDate; +import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMissingSoftware; +import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMissingSubject; +import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMoreOpenAccess; +import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMorePid; +import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMoreSoftware; +import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMoreSubject; import eu.dnetlib.dhp.broker.oa.util.BrokerConstants; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.common.HdfsSupport; diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingDatasetIsReferencedBy.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingDatasetIsReferencedBy.java deleted file mode 100644 index 3b9326fef2..0000000000 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingDatasetIsReferencedBy.java +++ /dev/null @@ -1,38 +0,0 @@ - -package eu.dnetlib.dhp.broker.oa.matchers; - -import java.util.List; - -import org.apache.commons.lang3.tuple.Pair; - -import eu.dnetlib.dhp.broker.model.Topic; -import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; -import eu.dnetlib.dhp.schema.oaf.Dataset; -import eu.dnetlib.dhp.schema.oaf.Result; - -public class EnrichMissingDatasetIsReferencedBy - extends UpdateMatcher>, eu.dnetlib.broker.objects.Dataset> { - - public EnrichMissingDatasetIsReferencedBy() { - super(true); - } - - @Override - protected List> findUpdates(final Pair> source, - final Pair> target) { - // TODO Auto-generated method stub - return null; - } - - @Override - protected UpdateInfo generateUpdateInfo( - final eu.dnetlib.broker.objects.Dataset highlightValue, - final Pair> source, - final Pair> target) { - return new UpdateInfo<>( - Topic.ENRICH_MISSING_DATASET_IS_REFERENCED_BY, - highlightValue, source.getLeft(), target.getLeft(), - (p, rel) -> p.getDatasets().add(rel), - rel -> rel.getInstances().get(0).getUrl()); - } -} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingDatasetIsRelatedTo.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingDatasetIsRelatedTo.java deleted file mode 100644 index 35f7c52b42..0000000000 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingDatasetIsRelatedTo.java +++ /dev/null @@ -1,38 +0,0 @@ - -package eu.dnetlib.dhp.broker.oa.matchers; - -import java.util.List; - -import org.apache.commons.lang3.tuple.Pair; - -import eu.dnetlib.dhp.broker.model.Topic; -import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; -import eu.dnetlib.dhp.schema.oaf.Dataset; -import eu.dnetlib.dhp.schema.oaf.Result; - -public class EnrichMissingDatasetIsRelatedTo - extends UpdateMatcher>, eu.dnetlib.broker.objects.Dataset> { - - public EnrichMissingDatasetIsRelatedTo() { - super(true); - } - - @Override - protected List> findUpdates(final Pair> source, - final Pair> target) { - // TODO Auto-generated method stub - return null; - } - - @Override - protected UpdateInfo generateUpdateInfo( - final eu.dnetlib.broker.objects.Dataset highlightValue, - final Pair> source, - final Pair> target) { - return new UpdateInfo<>( - Topic.ENRICH_MISSING_DATASET_IS_RELATED_TO, - highlightValue, source.getLeft(), target.getLeft(), - (p, rel) -> p.getDatasets().add(rel), - rel -> rel.getInstances().get(0).getUrl()); - } -} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingDatasetIsSupplementedBy.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingDatasetIsSupplementedBy.java deleted file mode 100644 index 1faa305b52..0000000000 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingDatasetIsSupplementedBy.java +++ /dev/null @@ -1,38 +0,0 @@ - -package eu.dnetlib.dhp.broker.oa.matchers; - -import java.util.List; - -import org.apache.commons.lang3.tuple.Pair; - -import eu.dnetlib.dhp.broker.model.Topic; -import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; -import eu.dnetlib.dhp.schema.oaf.Dataset; -import eu.dnetlib.dhp.schema.oaf.Result; - -public class EnrichMissingDatasetIsSupplementedBy - extends UpdateMatcher>, eu.dnetlib.broker.objects.Dataset> { - - public EnrichMissingDatasetIsSupplementedBy() { - super(true); - } - - @Override - protected List> findUpdates(final Pair> source, - final Pair> target) { - // TODO Auto-generated method stub - return null; - } - - @Override - protected UpdateInfo generateUpdateInfo( - final eu.dnetlib.broker.objects.Dataset highlightValue, - final Pair> source, - final Pair> target) { - return new UpdateInfo<>( - Topic.ENRICH_MISSING_DATASET_IS_SUPPLEMENTED_BY, - highlightValue, source.getLeft(), target.getLeft(), - (p, rel) -> p.getDatasets().add(rel), - rel -> rel.getInstances().get(0).getUrl()); - } -} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingDatasetIsSupplementedTo.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingDatasetIsSupplementedTo.java deleted file mode 100644 index d1b067272d..0000000000 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingDatasetIsSupplementedTo.java +++ /dev/null @@ -1,38 +0,0 @@ - -package eu.dnetlib.dhp.broker.oa.matchers; - -import java.util.List; - -import org.apache.commons.lang3.tuple.Pair; - -import eu.dnetlib.dhp.broker.model.Topic; -import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; -import eu.dnetlib.dhp.schema.oaf.Dataset; -import eu.dnetlib.dhp.schema.oaf.Result; - -public class EnrichMissingDatasetIsSupplementedTo - extends UpdateMatcher>, eu.dnetlib.broker.objects.Dataset> { - - public EnrichMissingDatasetIsSupplementedTo() { - super(true); - } - - @Override - protected List> findUpdates(final Pair> source, - final Pair> target) { - // TODO Auto-generated method stub - return null; - } - - @Override - protected UpdateInfo generateUpdateInfo( - final eu.dnetlib.broker.objects.Dataset highlightValue, - final Pair> source, - final Pair> target) { - return new UpdateInfo<>( - Topic.ENRICH_MISSING_DATASET_IS_SUPPLEMENTED_TO, - highlightValue, source.getLeft(), target.getLeft(), - (p, rel) -> p.getDatasets().add(rel), - rel -> rel.getInstances().get(0).getUrl()); - } -} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingDatasetReferences.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingDatasetReferences.java deleted file mode 100644 index ce6adeba27..0000000000 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingDatasetReferences.java +++ /dev/null @@ -1,38 +0,0 @@ - -package eu.dnetlib.dhp.broker.oa.matchers; - -import java.util.List; - -import org.apache.commons.lang3.tuple.Pair; - -import eu.dnetlib.dhp.broker.model.Topic; -import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; -import eu.dnetlib.dhp.schema.oaf.Dataset; -import eu.dnetlib.dhp.schema.oaf.Result; - -public class EnrichMissingDatasetReferences - extends UpdateMatcher>, eu.dnetlib.broker.objects.Dataset> { - - public EnrichMissingDatasetReferences() { - super(true); - } - - @Override - protected List> findUpdates(final Pair> source, - final Pair> target) { - // TODO Auto-generated method stub - return null; - } - - @Override - protected UpdateInfo generateUpdateInfo( - final eu.dnetlib.broker.objects.Dataset highlightValue, - final Pair> source, - final Pair> target) { - return new UpdateInfo<>( - Topic.ENRICH_MISSING_DATASET_REFERENCES, - highlightValue, source.getLeft(), target.getLeft(), - (p, rel) -> p.getDatasets().add(rel), - rel -> rel.getInstances().get(0).getUrl()); - } -} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationIsReferencedBy.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationIsReferencedBy.java deleted file mode 100644 index 8bcee5a1fc..0000000000 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationIsReferencedBy.java +++ /dev/null @@ -1,42 +0,0 @@ - -package eu.dnetlib.dhp.broker.oa.matchers; - -import java.util.Arrays; -import java.util.List; - -import org.apache.commons.lang3.tuple.Pair; - -import eu.dnetlib.dhp.broker.model.Topic; -import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; -import eu.dnetlib.dhp.schema.oaf.Publication; -import eu.dnetlib.dhp.schema.oaf.Result; - -public class EnrichMissingPublicationIsReferencedBy - extends UpdateMatcher>, eu.dnetlib.broker.objects.Publication> { - - public EnrichMissingPublicationIsReferencedBy() { - super(true); - } - - @Override - protected List> findUpdates( - final Pair> source, - final Pair> target) { - // TODO Auto-generated method stub - return Arrays.asList(); - } - - @Override - protected UpdateInfo generateUpdateInfo( - final eu.dnetlib.broker.objects.Publication highlightValue, - final Pair> source, - final Pair> target) { - return new UpdateInfo<>( - Topic.ENRICH_MISSING_PUBLICATION_IS_REFERENCED_BY, - highlightValue, source.getLeft(), target.getLeft(), - (p, rel) -> { - }, // p.getPublications().add(rel), //TODO available in the future release of dnet-openaire-broker-common - rel -> rel.getOriginalId()); - } - -} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationIsRelatedTo.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationIsRelatedTo.java deleted file mode 100644 index 0c16f9f564..0000000000 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationIsRelatedTo.java +++ /dev/null @@ -1,42 +0,0 @@ - -package eu.dnetlib.dhp.broker.oa.matchers; - -import java.util.Arrays; -import java.util.List; - -import org.apache.commons.lang3.tuple.Pair; - -import eu.dnetlib.dhp.broker.model.Topic; -import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; -import eu.dnetlib.dhp.schema.oaf.Publication; -import eu.dnetlib.dhp.schema.oaf.Result; - -public class EnrichMissingPublicationIsRelatedTo - extends UpdateMatcher>, eu.dnetlib.broker.objects.Publication> { - - public EnrichMissingPublicationIsRelatedTo() { - super(true); - } - - @Override - protected List> findUpdates( - final Pair> source, - final Pair> target) { - // TODO Auto-generated method stub - return Arrays.asList(); - } - - @Override - protected UpdateInfo generateUpdateInfo( - final eu.dnetlib.broker.objects.Publication highlightValue, - final Pair> source, - final Pair> target) { - return new UpdateInfo<>( - Topic.ENRICH_MISSING_PUBLICATION_IS_RELATED_TO, - highlightValue, source.getLeft(), target.getLeft(), - (p, rel) -> { - }, // p.getPublications().add(rel), //TODO available in the future release of dnet-openaire-broker-common - rel -> rel.getOriginalId()); - } - -} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationIsSupplementedBy.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationIsSupplementedBy.java deleted file mode 100644 index 0b3c332708..0000000000 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationIsSupplementedBy.java +++ /dev/null @@ -1,42 +0,0 @@ - -package eu.dnetlib.dhp.broker.oa.matchers; - -import java.util.Arrays; -import java.util.List; - -import org.apache.commons.lang3.tuple.Pair; - -import eu.dnetlib.dhp.broker.model.Topic; -import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; -import eu.dnetlib.dhp.schema.oaf.Publication; -import eu.dnetlib.dhp.schema.oaf.Result; - -public class EnrichMissingPublicationIsSupplementedBy - extends UpdateMatcher>, eu.dnetlib.broker.objects.Publication> { - - public EnrichMissingPublicationIsSupplementedBy() { - super(true); - } - - @Override - protected List> findUpdates( - final Pair> source, - final Pair> target) { - // TODO Auto-generated method stub - return Arrays.asList(); - } - - @Override - protected UpdateInfo generateUpdateInfo( - final eu.dnetlib.broker.objects.Publication highlightValue, - final Pair> source, - final Pair> target) { - return new UpdateInfo<>( - Topic.ENRICH_MISSING_PUBLICATION_IS_SUPPLEMENTED_BY, - highlightValue, source.getLeft(), target.getLeft(), - (p, rel) -> { - }, // p.getPublications().add(rel), //TODO available in the future release of dnet-openaire-broker-common - rel -> rel.getOriginalId()); - } - -} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationIsSupplementedTo.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationIsSupplementedTo.java deleted file mode 100644 index 0e72a84239..0000000000 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationIsSupplementedTo.java +++ /dev/null @@ -1,42 +0,0 @@ - -package eu.dnetlib.dhp.broker.oa.matchers; - -import java.util.Arrays; -import java.util.List; - -import org.apache.commons.lang3.tuple.Pair; - -import eu.dnetlib.dhp.broker.model.Topic; -import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; -import eu.dnetlib.dhp.schema.oaf.Publication; -import eu.dnetlib.dhp.schema.oaf.Result; - -public class EnrichMissingPublicationIsSupplementedTo - extends UpdateMatcher>, eu.dnetlib.broker.objects.Publication> { - - public EnrichMissingPublicationIsSupplementedTo() { - super(true); - } - - @Override - protected List> findUpdates( - final Pair> source, - final Pair> target) { - // TODO Auto-generated method stub - return Arrays.asList(); - } - - @Override - protected UpdateInfo generateUpdateInfo( - final eu.dnetlib.broker.objects.Publication highlightValue, - final Pair> source, - final Pair> target) { - return new UpdateInfo<>( - Topic.ENRICH_MISSING_PUBLICATION_IS_SUPPLEMENTED_TO, - highlightValue, source.getLeft(), target.getLeft(), - (p, rel) -> { - }, // p.getPublications().add(rel), //TODO available in the future release of dnet-openaire-broker-common - rel -> rel.getOriginalId()); - } - -} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationReferences.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationReferences.java deleted file mode 100644 index 6d1124974e..0000000000 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationReferences.java +++ /dev/null @@ -1,42 +0,0 @@ - -package eu.dnetlib.dhp.broker.oa.matchers; - -import java.util.Arrays; -import java.util.List; - -import org.apache.commons.lang3.tuple.Pair; - -import eu.dnetlib.dhp.broker.model.Topic; -import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; -import eu.dnetlib.dhp.schema.oaf.Publication; -import eu.dnetlib.dhp.schema.oaf.Result; - -public class EnrichMissingPublicationReferences - extends UpdateMatcher>, eu.dnetlib.broker.objects.Publication> { - - public EnrichMissingPublicationReferences() { - super(true); - } - - @Override - protected List> findUpdates( - final Pair> source, - final Pair> target) { - // TODO Auto-generated method stub - return Arrays.asList(); - } - - @Override - protected UpdateInfo generateUpdateInfo( - final eu.dnetlib.broker.objects.Publication highlightValue, - final Pair> source, - final Pair> target) { - return new UpdateInfo<>( - Topic.ENRICH_MISSING_PUBLICATION_REFERENCES, - highlightValue, source.getLeft(), target.getLeft(), - (p, rel) -> { - }, // p.getPublications().add(rel), //TODO available in the future release of dnet-openaire-broker-common - rel -> rel.getOriginalId()); - } - -} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/AbstractEnrichMissingDataset.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/AbstractEnrichMissingDataset.java new file mode 100644 index 0000000000..321fd43184 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/AbstractEnrichMissingDataset.java @@ -0,0 +1,63 @@ + +package eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets; + +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; + +import org.apache.commons.lang3.tuple.Pair; + +import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; +import eu.dnetlib.dhp.broker.oa.util.ConversionUtils; +import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; +import eu.dnetlib.dhp.schema.oaf.Dataset; +import eu.dnetlib.dhp.schema.oaf.Result; + +public abstract class AbstractEnrichMissingDataset + extends UpdateMatcher>, eu.dnetlib.broker.objects.Dataset> { + + private final Topic topic; + + public AbstractEnrichMissingDataset(final Topic topic) { + super(true); + this.topic = topic; + } + + @Override + protected final List> findUpdates( + final Pair> source, + final Pair> target) { + + final Set existingDatasets = target + .getRight() + .stream() + .map(Dataset::getId) + .collect(Collectors.toSet()); + + return source + .getRight() + .stream() + .filter(d -> !existingDatasets.contains(d.getId())) + .map(ConversionUtils::oafDatasetToBrokerDataset) + .map(i -> generateUpdateInfo(i, source, target)) + .collect(Collectors.toList()); + + } + + @Override + protected final UpdateInfo generateUpdateInfo( + final eu.dnetlib.broker.objects.Dataset highlightValue, + final Pair> source, + final Pair> target) { + return new UpdateInfo<>( + getTopic(), + highlightValue, source.getLeft(), target.getLeft(), + (p, rel) -> p.getDatasets().add(rel), + rel -> rel.getInstances().get(0).getUrl()); + } + + public Topic getTopic() { + return topic; + } +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsReferencedBy.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsReferencedBy.java new file mode 100644 index 0000000000..74ce761f48 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsReferencedBy.java @@ -0,0 +1,12 @@ + +package eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets; + +import eu.dnetlib.dhp.broker.model.Topic; + +public class EnrichMissingDatasetIsReferencedBy extends AbstractEnrichMissingDataset { + + public EnrichMissingDatasetIsReferencedBy() { + super(Topic.ENRICH_MISSING_DATASET_IS_REFERENCED_BY); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsRelatedTo.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsRelatedTo.java new file mode 100644 index 0000000000..05a8910591 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsRelatedTo.java @@ -0,0 +1,12 @@ + +package eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets; + +import eu.dnetlib.dhp.broker.model.Topic; + +public class EnrichMissingDatasetIsRelatedTo extends AbstractEnrichMissingDataset { + + public EnrichMissingDatasetIsRelatedTo() { + super(Topic.ENRICH_MISSING_DATASET_IS_RELATED_TO); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsSupplementedBy.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsSupplementedBy.java new file mode 100644 index 0000000000..23bd68fa1b --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsSupplementedBy.java @@ -0,0 +1,12 @@ + +package eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets; + +import eu.dnetlib.dhp.broker.model.Topic; + +public class EnrichMissingDatasetIsSupplementedBy extends AbstractEnrichMissingDataset { + + public EnrichMissingDatasetIsSupplementedBy() { + super(Topic.ENRICH_MISSING_DATASET_IS_SUPPLEMENTED_BY); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsSupplementedTo.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsSupplementedTo.java new file mode 100644 index 0000000000..03160b6f00 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsSupplementedTo.java @@ -0,0 +1,12 @@ + +package eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets; + +import eu.dnetlib.dhp.broker.model.Topic; + +public class EnrichMissingDatasetIsSupplementedTo extends AbstractEnrichMissingDataset { + + public EnrichMissingDatasetIsSupplementedTo() { + super(Topic.ENRICH_MISSING_DATASET_IS_SUPPLEMENTED_TO); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetReferences.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetReferences.java new file mode 100644 index 0000000000..bf1df053d6 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetReferences.java @@ -0,0 +1,12 @@ + +package eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets; + +import eu.dnetlib.dhp.broker.model.Topic; + +public class EnrichMissingDatasetReferences extends AbstractEnrichMissingDataset { + + public EnrichMissingDatasetReferences() { + super(Topic.ENRICH_MISSING_DATASET_REFERENCES); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingProject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMissingProject.java similarity index 90% rename from dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingProject.java rename to dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMissingProject.java index 0197c99b1b..461266d567 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingProject.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMissingProject.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.broker.oa.matchers; +package eu.dnetlib.dhp.broker.oa.matchers.relatedProjects; import java.util.Arrays; import java.util.List; @@ -7,6 +7,7 @@ import java.util.List; import org.apache.commons.lang3.tuple.Pair; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.schema.oaf.Project; import eu.dnetlib.dhp.schema.oaf.Result; diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMoreProject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMoreProject.java similarity index 90% rename from dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMoreProject.java rename to dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMoreProject.java index 4bf45d9437..d9bfb62d59 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMoreProject.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMoreProject.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.broker.oa.matchers; +package eu.dnetlib.dhp.broker.oa.matchers.relatedProjects; import java.util.Arrays; import java.util.List; @@ -7,6 +7,7 @@ import java.util.List; import org.apache.commons.lang3.tuple.Pair; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.schema.oaf.Project; import eu.dnetlib.dhp.schema.oaf.Result; diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/AbstractEnrichMissingPublication.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/AbstractEnrichMissingPublication.java new file mode 100644 index 0000000000..405b06ca6a --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/AbstractEnrichMissingPublication.java @@ -0,0 +1,64 @@ + +package eu.dnetlib.dhp.broker.oa.matchers.relatedPublications; + +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; + +import org.apache.commons.lang3.tuple.Pair; + +import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; +import eu.dnetlib.dhp.broker.oa.util.ConversionUtils; +import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; +import eu.dnetlib.dhp.schema.oaf.Publication; +import eu.dnetlib.dhp.schema.oaf.Result; + +public abstract class AbstractEnrichMissingPublication + extends UpdateMatcher>, eu.dnetlib.broker.objects.Publication> { + + private final Topic topic; + + public AbstractEnrichMissingPublication(final Topic topic) { + super(true); + this.topic = topic; + } + + @Override + protected final List> findUpdates( + final Pair> source, + final Pair> target) { + + final Set existingPublications = target + .getRight() + .stream() + .map(Publication::getId) + .collect(Collectors.toSet()); + + return source + .getRight() + .stream() + .filter(d -> !existingPublications.contains(d.getId())) + .map(ConversionUtils::oafPublicationToBrokerPublication) + .map(i -> generateUpdateInfo(i, source, target)) + .collect(Collectors.toList()); + + } + + @Override + protected final UpdateInfo generateUpdateInfo( + final eu.dnetlib.broker.objects.Publication highlightValue, + final Pair> source, + final Pair> target) { + return new UpdateInfo<>( + getTopic(), + highlightValue, source.getLeft(), target.getLeft(), + (p, rel) -> { + }, // p.getPublications().add(rel), //TODO available in the future release of dnet-openaire-broker-common + rel -> rel.getInstances().get(0).getUrl()); + } + + public Topic getTopic() { + return topic; + } +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsReferencedBy.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsReferencedBy.java new file mode 100644 index 0000000000..73fa8a45f6 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsReferencedBy.java @@ -0,0 +1,12 @@ + +package eu.dnetlib.dhp.broker.oa.matchers.relatedPublications; + +import eu.dnetlib.dhp.broker.model.Topic; + +public class EnrichMissingPublicationIsReferencedBy extends AbstractEnrichMissingPublication { + + public EnrichMissingPublicationIsReferencedBy() { + super(Topic.ENRICH_MISSING_PUBLICATION_IS_REFERENCED_BY); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsRelatedTo.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsRelatedTo.java new file mode 100644 index 0000000000..361ea3b342 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsRelatedTo.java @@ -0,0 +1,12 @@ + +package eu.dnetlib.dhp.broker.oa.matchers.relatedPublications; + +import eu.dnetlib.dhp.broker.model.Topic; + +public class EnrichMissingPublicationIsRelatedTo extends AbstractEnrichMissingPublication { + + public EnrichMissingPublicationIsRelatedTo() { + super(Topic.ENRICH_MISSING_PUBLICATION_IS_RELATED_TO); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsSupplementedBy.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsSupplementedBy.java new file mode 100644 index 0000000000..7e8863b1ed --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsSupplementedBy.java @@ -0,0 +1,12 @@ + +package eu.dnetlib.dhp.broker.oa.matchers.relatedPublications; + +import eu.dnetlib.dhp.broker.model.Topic; + +public class EnrichMissingPublicationIsSupplementedBy extends AbstractEnrichMissingPublication { + + public EnrichMissingPublicationIsSupplementedBy() { + super(Topic.ENRICH_MISSING_PUBLICATION_IS_SUPPLEMENTED_BY); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsSupplementedTo.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsSupplementedTo.java new file mode 100644 index 0000000000..dc4e513777 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsSupplementedTo.java @@ -0,0 +1,12 @@ + +package eu.dnetlib.dhp.broker.oa.matchers.relatedPublications; + +import eu.dnetlib.dhp.broker.model.Topic; + +public class EnrichMissingPublicationIsSupplementedTo extends AbstractEnrichMissingPublication { + + public EnrichMissingPublicationIsSupplementedTo() { + super(Topic.ENRICH_MISSING_PUBLICATION_IS_SUPPLEMENTED_TO); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationReferences.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationReferences.java new file mode 100644 index 0000000000..5198098bcf --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationReferences.java @@ -0,0 +1,12 @@ + +package eu.dnetlib.dhp.broker.oa.matchers.relatedPublications; + +import eu.dnetlib.dhp.broker.model.Topic; + +public class EnrichMissingPublicationReferences extends AbstractEnrichMissingPublication { + + public EnrichMissingPublicationReferences() { + super(Topic.ENRICH_MISSING_PUBLICATION_REFERENCES); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingAbstract.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAbstract.java similarity index 90% rename from dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingAbstract.java rename to dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAbstract.java index 6dab6355fd..a418b633ed 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingAbstract.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAbstract.java @@ -1,11 +1,12 @@ -package eu.dnetlib.dhp.broker.oa.matchers; +package eu.dnetlib.dhp.broker.oa.matchers.simple; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.schema.oaf.Result; diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingAuthorOrcid.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAuthorOrcid.java similarity index 90% rename from dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingAuthorOrcid.java rename to dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAuthorOrcid.java index c7146ad79a..b5c2f7e723 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingAuthorOrcid.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAuthorOrcid.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.broker.oa.matchers; +package eu.dnetlib.dhp.broker.oa.matchers.simple; import java.util.Arrays; import java.util.List; @@ -7,6 +7,7 @@ import java.util.List; import org.apache.commons.lang3.tuple.Pair; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.schema.oaf.Result; diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingOpenAccess.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingOpenAccess.java similarity index 93% rename from dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingOpenAccess.java rename to dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingOpenAccess.java index 81263c6c3f..c7e9dcbc11 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingOpenAccess.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingOpenAccess.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.broker.oa.matchers; +package eu.dnetlib.dhp.broker.oa.matchers.simple; import java.util.Arrays; import java.util.List; @@ -7,6 +7,7 @@ import java.util.stream.Collectors; import eu.dnetlib.broker.objects.Instance; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; import eu.dnetlib.dhp.broker.oa.util.BrokerConstants; import eu.dnetlib.dhp.broker.oa.util.ConversionUtils; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPid.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPid.java similarity index 91% rename from dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPid.java rename to dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPid.java index 5f10bb4d9e..522d46d402 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPid.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPid.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.broker.oa.matchers; +package eu.dnetlib.dhp.broker.oa.matchers.simple; import java.util.Arrays; import java.util.List; @@ -7,6 +7,7 @@ import java.util.stream.Collectors; import eu.dnetlib.broker.objects.Pid; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; import eu.dnetlib.dhp.broker.oa.util.ConversionUtils; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.schema.oaf.Result; diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationDate.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDate.java similarity index 90% rename from dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationDate.java rename to dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDate.java index 19ef2bab7e..197ace97c8 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationDate.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDate.java @@ -1,11 +1,12 @@ -package eu.dnetlib.dhp.broker.oa.matchers; +package eu.dnetlib.dhp.broker.oa.matchers.simple; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.schema.oaf.Result; diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingSoftware.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingSoftware.java similarity index 91% rename from dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingSoftware.java rename to dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingSoftware.java index 954ee48be7..4fcba43a40 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingSoftware.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingSoftware.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.broker.oa.matchers; +package eu.dnetlib.dhp.broker.oa.matchers.simple; import java.util.Arrays; import java.util.List; @@ -7,6 +7,7 @@ import java.util.List; import org.apache.commons.lang3.tuple.Pair; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.Software; diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingSubject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingSubject.java similarity index 93% rename from dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingSubject.java rename to dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingSubject.java index a7c72f6ea3..290bad48b3 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingSubject.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingSubject.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.broker.oa.matchers; +package eu.dnetlib.dhp.broker.oa.matchers.simple; import java.util.List; import java.util.Set; @@ -8,6 +8,7 @@ import java.util.stream.Collectors; import org.apache.commons.lang3.tuple.Pair; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; import eu.dnetlib.dhp.broker.oa.util.ConversionUtils; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.schema.oaf.Qualifier; diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMoreOpenAccess.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreOpenAccess.java similarity index 93% rename from dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMoreOpenAccess.java rename to dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreOpenAccess.java index 2cd2775c9e..c376da44d2 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMoreOpenAccess.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreOpenAccess.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.broker.oa.matchers; +package eu.dnetlib.dhp.broker.oa.matchers.simple; import java.util.List; import java.util.Set; @@ -7,6 +7,7 @@ import java.util.stream.Collectors; import eu.dnetlib.broker.objects.Instance; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; import eu.dnetlib.dhp.broker.oa.util.BrokerConstants; import eu.dnetlib.dhp.broker.oa.util.ConversionUtils; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMorePid.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMorePid.java similarity index 92% rename from dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMorePid.java rename to dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMorePid.java index 048d19747d..2ee327c831 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMorePid.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMorePid.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.broker.oa.matchers; +package eu.dnetlib.dhp.broker.oa.matchers.simple; import java.util.List; import java.util.Set; @@ -7,6 +7,7 @@ import java.util.stream.Collectors; import eu.dnetlib.broker.objects.Pid; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; import eu.dnetlib.dhp.broker.oa.util.ConversionUtils; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.schema.oaf.Result; diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMoreSoftware.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreSoftware.java similarity index 91% rename from dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMoreSoftware.java rename to dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreSoftware.java index 9760504f64..a1affff623 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMoreSoftware.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreSoftware.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.broker.oa.matchers; +package eu.dnetlib.dhp.broker.oa.matchers.simple; import java.util.Arrays; import java.util.List; @@ -7,6 +7,7 @@ import java.util.List; import org.apache.commons.lang3.tuple.Pair; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.Software; diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMoreSubject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreSubject.java similarity index 93% rename from dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMoreSubject.java rename to dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreSubject.java index 67c2f01161..b38445e885 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMoreSubject.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreSubject.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.broker.oa.matchers; +package eu.dnetlib.dhp.broker.oa.matchers.simple; import java.util.List; import java.util.Set; @@ -8,6 +8,7 @@ import java.util.stream.Collectors; import org.apache.commons.lang3.tuple.Pair; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; import eu.dnetlib.dhp.broker.oa.util.ConversionUtils; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.schema.oaf.Result; diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ConversionUtils.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ConversionUtils.java index 2e2ce202ad..2f87d0ee76 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ConversionUtils.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ConversionUtils.java @@ -7,6 +7,8 @@ import org.apache.commons.lang3.tuple.Pair; import eu.dnetlib.broker.objects.Instance; import eu.dnetlib.broker.objects.Pid; +import eu.dnetlib.dhp.schema.oaf.Dataset; +import eu.dnetlib.dhp.schema.oaf.Publication; import eu.dnetlib.dhp.schema.oaf.StructuredProperty; public class ConversionUtils { @@ -33,4 +35,15 @@ public class ConversionUtils { return Pair.of(sp.getQualifier().getClassid(), sp.getValue()); } + public static final eu.dnetlib.broker.objects.Dataset oafDatasetToBrokerDataset(final Dataset d) { + final eu.dnetlib.broker.objects.Dataset res = new eu.dnetlib.broker.objects.Dataset(); + // TODO + return res; + } + + public static final eu.dnetlib.broker.objects.Publication oafPublicationToBrokerPublication(final Publication d) { + final eu.dnetlib.broker.objects.Publication res = new eu.dnetlib.broker.objects.Publication(); + // TODO + return res; + } } From 0fd0c7d72520f97322504f405496358a3778ecdd Mon Sep 17 00:00:00 2001 From: miconis Date: Fri, 22 May 2020 17:24:57 +0200 Subject: [PATCH 16/37] reimplementation of the sim between two authors. now it takes into account both name and surname. threshold incremented to 1.0 if the name is too short --- .../eu/dnetlib/dhp/oa/dedup/AuthorMerger.java | 41 +++++++++++-------- .../dhp/oa/dedup/EntityMergerTest.java | 13 ++++++ .../dhp/dedup/json/publication_merge2.json | 4 ++ 3 files changed, 40 insertions(+), 18 deletions(-) create mode 100644 dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/json/publication_merge2.json diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/AuthorMerger.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/AuthorMerger.java index 43df19f8a4..5e63c4b656 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/AuthorMerger.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/AuthorMerger.java @@ -20,12 +20,7 @@ public class AuthorMerger { public static List merge(List> authors) { - authors.sort(new Comparator>() { - @Override - public int compare(List o1, List o2) { - return -Integer.compare(countAuthorsPids(o1), countAuthorsPids(o2)); - } - }); + authors.sort((o1, o2) -> -Integer.compare(countAuthorsPids(o1), countAuthorsPids(o2))); List author = new ArrayList<>(); @@ -86,20 +81,28 @@ public class AuthorMerger { .stream() .map(ba -> new Tuple2<>(sim(ba, a._2()), ba)) .max(Comparator.comparing(Tuple2::_1)); - if (simAuthor.isPresent() && simAuthor.get()._1() > THRESHOLD) { - Author r = simAuthor.get()._2(); - if (r.getPid() == null) { - r.setPid(new ArrayList<>()); + + if(simAuthor.isPresent()) { + double th = THRESHOLD; + //increase the threshold if the surname is too short + if (simAuthor.get()._2().getSurname() != null && simAuthor.get()._2().getSurname().length()<=3) + th = 0.99; + + if (simAuthor.get()._1() > th) { + Author r = simAuthor.get()._2(); + if (r.getPid() == null) { + r.setPid(new ArrayList<>()); + } + r.getPid().add(a._1()); } - r.getPid().add(a._1()); } }); } public static String pidToComparableString(StructuredProperty pid) { - return (pid.getQualifier() != null - ? pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase() : "" - : "") + (pid.getValue() != null ? pid.getValue().toLowerCase() : ""); + return (pid.getQualifier() != null ? + pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase() : "" : "") + + (pid.getValue() != null ? pid.getValue().toLowerCase() : ""); } public static int countAuthorsPids(List authors) { @@ -120,12 +123,14 @@ public class AuthorMerger { final Person pa = parse(a); final Person pb = parse(b); + //if both are accurate (e.g. they have name and surname) if (pa.isAccurate() & pb.isAccurate()) { - return new JaroWinkler() - .score(normalize(pa.getSurnameString()), normalize(pb.getSurnameString())); + return + new JaroWinkler().score(normalize(pa.getSurnameString()), normalize(pb.getSurnameString()))*0.5 + + new JaroWinkler().score(normalize(pa.getNameString()), normalize(pb.getNameString()))*0.5; } else { - return new JaroWinkler() - .score(normalize(pa.getNormalisedFullname()), normalize(pb.getNormalisedFullname())); + return + new JaroWinkler().score(normalize(pa.getNormalisedFullname()), normalize(pb.getNormalisedFullname())); } } diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java index 0a3bf62ea8..55879030be 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java @@ -21,6 +21,7 @@ import scala.Tuple2; public class EntityMergerTest implements Serializable { List> publications; + List> publications2; String testEntityBasePath; DataInfo dataInfo; @@ -36,6 +37,7 @@ public class EntityMergerTest implements Serializable { .getAbsolutePath(); publications = readSample(testEntityBasePath + "/publication_merge.json", Publication.class); + publications2 = readSample(testEntityBasePath + "/publication_merge2.json", Publication.class); pub_top = getTopPub(publications); @@ -90,6 +92,17 @@ public class EntityMergerTest implements Serializable { assertEquals(AuthorMerger.countAuthorsPids(pub_merged.getAuthor()), 4); } + @Test + public void publicationMergerTest2() throws InstantiationException, IllegalAccessException, IOException { + + Publication pub_merged = DedupRecordFactory + .entityMerger(dedupId, publications2.iterator(), 0, dataInfo, Publication.class); + + assertEquals(pub_merged.getAuthor().size(), 27); + // insert assertions here + + } + public DataInfo setDI() { DataInfo dataInfo = new DataInfo(); dataInfo.setTrust("0.9"); diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/json/publication_merge2.json b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/json/publication_merge2.json new file mode 100644 index 0000000000..a7937c2873 --- /dev/null +++ b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/json/publication_merge2.json @@ -0,0 +1,4 @@ +{"id":"50|doajarticles::842fa3b99fcdccafb4d5c8a815f56efa","dateofcollection":"2020-04-06T12:22:31.216Z","title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Increased Expression of Herpes Virus-Encoded hsv1-miR-H18 and hsv2-miR-H9-5p in Cancer-Containing Prostate Tissue Compared to That in Benign Prostate Hyperplasia Tissue"}],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Korean Continence Society"},"bestaccessright":null,"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"collectedfrom":[{"dataInfo":null,"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.5213/inj.1632552.276"}],"author":[{"affiliation":null,"fullname":"Seok Joong Yun","name":null,"pid":[],"rank":1,"surname":null},{"affiliation":null,"fullname":"Pildu Jeong","name":null,"pid":[],"rank":2,"surname":null},{"affiliation":null,"fullname":"Ho Won Kang","name":null,"pid":[],"rank":3,"surname":null},{"affiliation":null,"fullname":"Helen Ki Shinn","name":null,"pid":[],"rank":4,"surname":null},{"affiliation":null,"fullname":"Ye-Hwan Kim","name":null,"pid":[],"rank":5,"surname":null},{"affiliation":null,"fullname":"Chunri Yan","name":null,"pid":[],"rank":6,"surname":null},{"affiliation":null,"fullname":"Young-Ki Choi","name":null,"pid":[],"rank":7,"surname":null},{"affiliation":null,"fullname":"Dongho Kim","name":null,"pid":[],"rank":8,"surname":null},{"affiliation":null,"fullname":"Dong Hee Ryu","name":null,"pid":[],"rank":9,"surname":null},{"affiliation":null,"fullname":"Yun-Sok Ha","name":null,"pid":[],"rank":10,"surname":null},{"affiliation":null,"fullname":"Tae-Hwan Kim","name":null,"pid":[],"rank":11,"surname":null},{"affiliation":null,"fullname":"Tae Gyun Kwon","name":null,"pid":[],"rank":12,"surname":null},{"affiliation":null,"fullname":"Jung Min Kim","name":null,"pid":[],"rank":13,"surname":null},{"affiliation":null,"fullname":"Sang Heon Suh","name":null,"pid":[],"rank":14,"surname":null},{"affiliation":null,"fullname":"Seon-Kyu Kim","name":null,"pid":[],"rank":15,"surname":null},{"affiliation":null,"fullname":"Seon-Young Kim","name":null,"pid":[],"rank":16,"surname":null},{"affiliation":null,"fullname":"Sang Tae Kim","name":null,"pid":[],"rank":17,"surname":null},{"affiliation":null,"fullname":"Won Tae Kim","name":null,"pid":[],"rank":18,"surname":null},{"affiliation":null,"fullname":"Ok-Jun Lee","name":null,"pid":[],"rank":19,"surname":null},{"affiliation":null,"fullname":"Sung-Kwon Moon","name":null,"pid":[],"rank":20,"surname":null},{"affiliation":null,"fullname":"Nam-Hyung Kim","name":null,"pid":[],"rank":21,"surname":null},{"affiliation":null,"fullname":"Isaac Yi Kim","name":null,"pid":[],"rank":22,"surname":null},{"affiliation":null,"fullname":"Jayoung Kim","name":null,"pid":[],"rank":23,"surname":null},{"affiliation":null,"fullname":"Hee-Jae Cha","name":null,"pid":[],"rank":24,"surname":null},{"affiliation":null,"fullname":"Yung-Hyun Choi","name":null,"pid":[],"rank":25,"surname":null},{"affiliation":null,"fullname":"Eun-Jong Cha","name":null,"pid":[],"rank":26,"surname":null},{"affiliation":null,"fullname":"Wun-Jae Kim","name":null,"pid":[],"rank":27,"surname":null}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"MicroRNAs"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Prostate Neoplasms"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Herpesviridae"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Prostate Hyperplasia"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Diseases of the genitourinary system. Urology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"RC870-923"}],"description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Purpose: Previously, we reported the presence of virus-encoded microRNAs (miRNAs) in the urine of prostate cancer (CaP) patients. In this study, we investigated the expression of two herpes virus-encoded miRNAs in prostate tissue. Methods: A total of 175 tissue samples from noncancerous benign prostatic hyperplasia (BPH), 248 tissue samples from patients with CaP and BPH, and 50 samples from noncancerous surrounding tissues from these same patients were analyzed for the expression of two herpes virus-encoded miRNAs by real-time polymerase chain reaction (PCR) and immunocytochemistry using nanoparticles as molecular beacons. Results: Real-time reverse transcription-PCR results revealed significantly higher expression of hsv1-miR-H18 and hsv2-miRH9- 5p in surrounding noncancerous and CaP tissues than that in BPH tissue (each comparison, P<0.001). Of note, these miRNA were expressed equivalently in the CaP tissues and surrounding noncancerous tissues. Moreover, immunocytochemistry clearly demonstrated a significant enrichment of both hsv1-miR-H18 and hsv2-miR-H9 beacon-labeled cells in CaP and surrounding noncancerous tissue compared to that in BPH tissue (each comparison, P<0.05 for hsv1-miR-H18 and hsv2- miR-H9). Conclusions: These results suggest that increased expression of hsv1-miR-H18 and hsv2-miR-H95p might be associated with tumorigenesis in the prostate. Further studies will be required to elucidate the role of these miRNAs with respect to CaP and herpes viral infections."}],"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2016-06-01"},"embargoenddate":null,"resourcetype":null,"context":[],"instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2016-06-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|doajarticles::52db9a4f8e176f6e8e1d9f0c1e0a2de0","value":"International Neurourology Journal"},"instancetype":{"classid":"0001","classname":"peerReviewed","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":["http://www.einj.org/upload/pdf/inj-1632552-276.pdf","https://doaj.org/toc/2093-4777","https://doaj.org/toc/2093-6931"]}]} +{"id":"50|od_______267::b5f5da11a8239ef57655cea8675cb466","dateofcollection":"","title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Increased Expression of Herpes Virus-Encoded hsv1-miR-H18 and hsv2-miR-H9-5p in Cancer-Containing Prostate Tissue Compared to That in Benign Prostate Hyperplasia Tissue"}],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Korean Continence Society"},"bestaccessright":null,"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"collectedfrom":[{"dataInfo":null,"key":"10|opendoar____::eda80a3d5b344bc40f3bc04f65b7a357","value":"PubMed Central"}],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.5213/inj.1632552.276"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"pmc","classname":"pmc","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"PMC4932644"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"pmid","classname":"pmid","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"27377944"}],"author":[{"affiliation":null,"fullname":"Yun, Seok Joong","name":"Seok Joong","pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0001-7737-4746"}],"rank":1,"surname":"Yun"},{"affiliation":null,"fullname":"Jeong, Pildu","name":"Pildu","pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0002-5602-5376"}],"rank":2,"surname":"Jeong"},{"affiliation":null,"fullname":"Kang, Ho Won","name":"Ho Won","pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0002-8164-4427"}],"rank":3,"surname":"Kang"},{"affiliation":null,"fullname":"Shinn, Helen Ki","name":"Helen Ki","pid":[],"rank":4,"surname":"Shinn"},{"affiliation":null,"fullname":"Kim, Ye-Hwan","name":"Ye-Hwan","pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0002-8676-7119"}],"rank":5,"surname":"Kim"},{"affiliation":null,"fullname":"Yan, Chunri","name":"Chunri","pid":[],"rank":6,"surname":"Yan"},{"affiliation":null,"fullname":"Choi, Young-Ki","name":"Young-Ki","pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0002-1894-9869"}],"rank":7,"surname":"Choi"},{"affiliation":null,"fullname":"Kim, Dongho","name":"Dongho","pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0002-1409-3311"}],"rank":8,"surname":"Kim"},{"affiliation":null,"fullname":"Ryu, Dong Hee","name":"Dong Hee","pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0001-6088-298X"}],"rank":9,"surname":"Ryu"},{"affiliation":null,"fullname":"Ha, Yun-Sok","name":"Yun-Sok","pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0003-3732-9814"}],"rank":10,"surname":"Ha"},{"affiliation":null,"fullname":"Kim, Tae-Hwan","name":"Tae-Hwan","pid":[],"rank":11,"surname":"Kim"},{"affiliation":null,"fullname":"Kwon, Tae Gyun","name":"Tae Gyun","pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0002-4390-0952"}],"rank":12,"surname":"Kwon"},{"affiliation":null,"fullname":"Kim, Jung Min","name":"Jung Min","pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0001-6319-0217"}],"rank":13,"surname":"Kim"},{"affiliation":null,"fullname":"Suh, Sang Heon","name":"Sang Heon","pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0003-4560-8880"}],"rank":14,"surname":"Suh"},{"affiliation":null,"fullname":"Kim, Seon-Kyu","name":"Seon-Kyu","pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0002-4176-5187"}],"rank":15,"surname":"Kim"},{"affiliation":null,"fullname":"Kim, Seon-Young","name":"Seon-Young","pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0002-1030-7730"}],"rank":16,"surname":"Kim"},{"affiliation":null,"fullname":"Kim, Sang Tae","name":"Sang Tae","pid":[],"rank":17,"surname":"Kim"},{"affiliation":null,"fullname":"Kim, Won Tae","name":"Won Tae","pid":[],"rank":18,"surname":"Kim"},{"affiliation":null,"fullname":"Lee, Ok-Jun","name":"Ok-Jun","pid":[],"rank":19,"surname":"Lee"},{"affiliation":null,"fullname":"Moon, Sung-Kwon","name":"Sung-Kwon","pid":[],"rank":20,"surname":"Moon"},{"affiliation":null,"fullname":"Kim, Nam-Hyung","name":"Nam-Hyung","pid":[],"rank":21,"surname":"Kim"},{"affiliation":null,"fullname":"Kim, Isaac Yi","name":"Isaac Yi","pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0002-1967-5281"}],"rank":22,"surname":"Kim"},{"affiliation":null,"fullname":"Kim, Jayoung","name":"Jayoung","pid":[],"rank":23,"surname":"Kim"},{"affiliation":null,"fullname":"Cha, Hee-Jae","name":"Hee-Jae","pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0002-6963-2685"}],"rank":24,"surname":"Cha"},{"affiliation":null,"fullname":"Choi, Yung-Hyun","name":"Yung-Hyun","pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0002-1454-3124"}],"rank":25,"surname":"Choi"},{"affiliation":null,"fullname":"Cha, Eun-Jong","name":"Eun-Jong","pid":[],"rank":26,"surname":"Cha"},{"affiliation":null,"fullname":"Kim, Wun-Jae","name":"Wun-Jae","pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0002-8060-8926"}],"rank":27,"surname":"Kim"}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Original Article"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Fundamental Science for Neurourology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"MicroRNAs"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Prostate Neoplasms"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Herpesviridae"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Prostate Hyperplasia"}],"description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Purpose: Previously, we reported the presence of virus-encoded microRNAs (miRNAs) in the urine of prostate cancer (CaP) patients. In this study, we investigated the expression of two herpes virus-encoded miRNAs in prostate tissue. Methods: A total of 175 tissue samples from noncancerous benign prostatic hyperplasia (BPH), 248 tissue samples from patients with CaP and BPH, and 50 samples from noncancerous surrounding tissues from these same patients were analyzed for the expression of two herpes virus-encoded miRNAs by real-time polymerase chain reaction (PCR) and immunocytochemistry using nanoparticles as molecular beacons. Results: Real-time reverse transcription-PCR results revealed significantly higher expression of hsv1-miR-H18 and hsv2-miRH9- 5p in surrounding noncancerous and CaP tissues than that in BPH tissue (each comparison, P<0.001). Of note, these miRNA were expressed equivalently in the CaP tissues and surrounding noncancerous tissues. Moreover, immunocytochemistry clearly demonstrated a significant enrichment of both hsv1-miR-H18 and hsv2-miR-H9 beacon-labeled cells in CaP and surrounding noncancerous tissue compared to that in BPH tissue (each comparison, P<0.05 for hsv1-miR-H18 and hsv2- miR-H9). Conclusions: These results suggest that increased expression of hsv1-miR-H18 and hsv2-miR-H95p might be associated with tumorigenesis in the prostate. Further studies will be required to elucidate the role of these miRNAs with respect to CaP and herpes viral infections."}],"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2016-06-01"},"embargoenddate":null,"resourcetype":null,"context":[],"instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|opendoar____::eda80a3d5b344bc40f3bc04f65b7a357","value":"PubMed Central"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2016-06-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c","value":"Europe PubMed Central"},"instancetype":{"classid":"0001","classname":"peerReviewed","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":["http://europepmc.org/articles/PMC4932644"]}]} +{"id":"50|doiboost____::0ca46ff10b2b4c756191719d85302b14","dateofcollection":"2019-02-15","title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Increased Expression of Herpes Virus-Encoded hsv1-miR-H18 and hsv2-miR-H9-5p in Cancer-Containing Prostate Tissue Compared to That in Benign Prostate Hyperplasia Tissue"}],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"collectedfrom":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"key":"10|openaire____::8ac8380272269217cb09a928c8caa993","value":"UnpayWall"}],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.5213/inj.1632552.276"}],"author":[{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Chungbuk National University"}],"fullname":"Seok Joong Yun","name":"Seok Joong","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2105974574"}],"rank":1,"surname":"Yun"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Chungbuk National University"}],"fullname":"Pildu Jeong","name":"Pildu","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2041919263"}],"rank":2,"surname":"Jeong"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Chungbuk National University"}],"fullname":"Ho Won Kang","name":"Ho Won","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2164408067"}],"rank":3,"surname":"Kang"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Inha University"}],"fullname":"Helen Ki Shinn","name":"Helen Ki","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2045077081"}],"rank":4,"surname":"Shinn"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Chungbuk National University"}],"fullname":"Ye-Hwan Kim","name":"Ye-Hwan","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2276303457"}],"rank":5,"surname":"Kim"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Chungbuk National University"}],"fullname":"Chunri Yan","name":"Chunri","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2186750404"}],"rank":6,"surname":"Yan"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Chungbuk National University"}],"fullname":"Young-Ki Choi","name":"Young-Ki","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2311466124"}],"rank":7,"surname":"Choi"},{"affiliation":[],"fullname":"Dongho Kim","name":"Dongho","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2644843893"}],"rank":8,"surname":"Kim"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Chungbuk National University"}],"fullname":"Dong Hee Ryu","name":"Dong Hee","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2117604941"}],"rank":9,"surname":"Ryu"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Kyungpook National University"}],"fullname":"Yun-Sok Ha","name":"Yun-Sok","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2145233282"}],"rank":10,"surname":"Ha"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Kyungpook National University"}],"fullname":"Tae-Hwan Kim","name":"Tae-Hwan","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2509096378"}],"rank":11,"surname":"Kim"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Kyungpook National University"}],"fullname":"Tae Gyun Kwon","name":"Tae Gyun","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"1978978081"}],"rank":12,"surname":"Kwon"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Daejeon University"}],"fullname":"Jung Min Kim","name":"Jung Min","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2265841962"}],"rank":13,"surname":"Kim"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"KAIST"}],"fullname":"Sang Heon Suh","name":"Sang Heon","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2890693470"}],"rank":14,"surname":"Suh"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Korea Research Institute of Bioscience and Biotechnology"}],"fullname":"Seon-Kyu Kim","name":"Seon-Kyu","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2162364977"}],"rank":15,"surname":"Kim"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Korea Research Institute of Bioscience and Biotechnology"}],"fullname":"Seon-Young Kim","name":"Seon-Young","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2344797375"}],"rank":16,"surname":"Kim"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Seoul National University Bundang Hospital"}],"fullname":"Sang Tae Kim","name":"Sang Tae","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2257827509"}],"rank":17,"surname":"Kim"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Chungbuk National University"}],"fullname":"Won Tae Kim","name":"Won Tae","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2617237649"}],"rank":18,"surname":"Kim"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Chungbuk National University"}],"fullname":"Ok-Jun Lee","name":"Ok-Jun","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2112231548"}],"rank":19,"surname":"Lee"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Chung-Ang University"}],"fullname":"Sung-Kwon Moon","name":"Sung-Kwon","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2796689429"}],"rank":20,"surname":"Moon"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Chungbuk National University"}],"fullname":"Nam-Hyung Kim","name":"Nam-Hyung","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2136287741"}],"rank":21,"surname":"Kim"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Rutgers University"}],"fullname":"Isaac Yi Kim","name":"Isaac Yi","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2015295992"}],"rank":22,"surname":"Kim"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Harvard University"}],"fullname":"Jayoung Kim","name":"Jayoung","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2130848131"}],"rank":23,"surname":"Kim"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Kosin University"}],"fullname":"Hee-Jae Cha","name":"Hee-Jae","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2113489867"}],"rank":24,"surname":"Cha"},{"affiliation":[],"fullname":"Yung-Hyun Choi","name":"Yung-Hyun","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2151282194"}],"rank":25,"surname":"Choi"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Chungbuk National University"}],"fullname":"Eun-Jong Cha","name":"Eun-Jong","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2109572239"}],"rank":26,"surname":"Cha"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Chungbuk National University"}],"fullname":"Wun-Jae Kim","name":"Wun-Jae","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2113339670"}],"rank":27,"surname":"Kim"}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[],"description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Purpose:"}],"dateofacceptance":null,"embargoenddate":null,"resourcetype":null,"context":null,"instance":null} +{"id":"Previously, we reported the presence of virus-encoded microRNAs (miRNAs) in the urine of prostate cancer (CaP) patients. In this study, we investigated the expression of two herpes virus-encoded miRNAs in prostate tissue.","dateofcollection":"false\u0004\u0004false\u0004false\u0004\u0005\u0005\u0005\u0004\u00032016-6-30","title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":null,"inferred":null,"invisible":null,"provenanceaction":null,"trust":null},"qualifier":{"classid":"","classname":null,"schemeid":null,"schemename":null},"value":"false"},{"dataInfo":{"deletedbyinference":null,"inferenceprovenance":null,"inferred":null,"invisible":null,"provenanceaction":null,"trust":null},"qualifier":null,"value":null}],"publisher":{"dataInfo":{"deletedbyinference":null,"inferenceprovenance":null,"inferred":null,"invisible":null,"provenanceaction":null,"trust":null},"value":""},"bestaccessright":{"classid":"","classname":null,"schemeid":null,"schemename":null},"dataInfo":{"deletedbyinference":null,"inferenceprovenance":"UNKNOWN\u0005not available\u0005dnet:access_modes\u0005dnet:access_modes\u0004false\u0006\u0006false\u0006false\u0006\u0007\u0007\u0007\u0006\u000510|openaire____::081b82f96300b6a6e3d282bad31cb6e2\u0005Crossref\u0004false\u0006\u0006false\u0006false\u0006\u0007\u0007\u0007\u0006\u0005\u0004\u0004false\u0006\u0006false\u0006false\u0006\u0007\u0007\u0007\u0006\u000510|openaire____::55045bd2a65019fd8e6741a755395c8c\u0005Unknown Repository\u00040001\u0005Article\u0005dnet:publication_resource\u0005dnet:publication_resource\u0004false\u0006\u0006false\u0006false\u0006\u0007\u0007\u0007\u0006\u0005\u0004false\u0006\u0006false\u0006false\u0006\u0007\u0007\u0007\u0006\u0005\u0004false\u0006\u0006false\u0006false\u0006\u0007\u0007\u0007\u0006\u0005\u0004false\u0006\u0006false\u0006false\u0006\u0007\u0007\u0007\u0006\u0005\u0004http://einj.org/upload/pdf/inj-1632552-276.pdf","inferred":null,"invisible":null,"provenanceaction":{"classid":"UNKNOWN\u0005not available\u0005dnet:access_modes\u0005dnet:access_modes","classname":"false\u0006\u0006false\u0006false\u0006\u0007\u0007\u0007\u0006\u000510|openaire____::5f532a3fc4f1ea403f37070f59a7a53a\u0005Microsoft Academic Graph","schemeid":"false\u0006\u0006false\u0006false\u0006\u0007\u0007\u0007\u0006\u0005","schemename":""},"trust":"RESTRICTED\u0005Restricted\u0005dnet:access_modes\u0005dnet:access_modes\u0004false\u0006\u0006false\u0006false\u0006\u0007\u0007\u0007\u0006\u000510|openaire____::081b82f96300b6a6e3d282bad31cb6e2\u0005Crossref\u0004false\u0006\u0006false\u0006false\u0006\u0007\u0007\u0007\u0006\u0005\u0004\u0004false\u0006\u0006false\u0006false\u0006\u0007\u0007\u0007\u0006\u000510|doajarticles::52db9a4f8e176f6e8e1d9f0c1e0a2de0\u0005International Neurourology Journal\u00040001\u0005Article\u0005dnet:publication_resource\u0005dnet:publication_resource\u0004false\u0006\u0006false\u0006false\u0006\u0007\u0007\u0007\u0006\u0005\u0004false\u0006\u0006false\u0006false\u0006\u0007\u0007\u0007\u0006\u0005\u0004false\u0006\u0006false\u0006false\u0006\u0007\u0007\u0007\u0006\u0005\u0004false\u0006\u0006false\u0006false\u0006\u0007\u0007\u0007\u0006\u0005\u0004http://dx.doi.org/10.5213/inj.1632552.276"},"collectedfrom":null,"pid":null,"author":null,"resulttype":null,"language":null,"country":null,"subject":null,"description":null,"dateofacceptance":null,"embargoenddate":null,"resourcetype":null,"context":null,"instance":null} \ No newline at end of file From 0d1ec1913f40827b5e3fbda2575082072c9123ba Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 22 May 2020 18:42:25 +0200 Subject: [PATCH 17/37] added fix to avoid duplication of results --- .../PrepareResultCommunitySet.java | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java index 5d0b75a8e0..750d333e53 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java @@ -95,20 +95,20 @@ public class PrepareResultCommunitySet { result_organizationset .map(mapResultCommunityFn(organizationMap), Encoders.bean(ResultCommunityList.class)) .filter(Objects::nonNull) - .toJavaRDD() - .mapToPair(value -> new Tuple2<>(value.getResultId(), value)) - .reduceByKey((a, b) -> { - ArrayList cl = a.getCommunityList(); - b.getCommunityList().stream().forEach(s -> { - if (!cl.contains(s)) { - cl.add(s); - } - }); - a.setCommunityList(cl); - return a; - }) - .map(value -> OBJECT_MAPPER.writeValueAsString(value._2())) - .saveAsTextFile(outputPath, GzipCodec.class); + .toJavaRDD() + .mapToPair(value -> new Tuple2<>(value.getResultId(), value)) + .reduceByKey((a, b) -> { + ArrayList cl = a.getCommunityList(); + b.getCommunityList().stream().forEach(s -> { + if (!cl.contains(s)) { + cl.add(s); + } + }); + a.setCommunityList(cl); + return a; + }) + .map(value -> OBJECT_MAPPER.writeValueAsString(value._2())) + .saveAsTextFile(outputPath, GzipCodec.class); // .write() // .mode(SaveMode.Overwrite) // .option("compression", "gzip") From 7181807e644ed8db9765aa45a7b4b3ff65d49a60 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Sat, 23 May 2020 09:51:48 +0200 Subject: [PATCH 18/37] code formatting --- .../eu/dnetlib/dhp/oa/dedup/AuthorMerger.java | 25 ++++++++++--------- .../dhp/oa/dedup/EntityMergerTest.java | 2 +- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/AuthorMerger.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/AuthorMerger.java index 5e63c4b656..ee5fd51656 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/AuthorMerger.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/AuthorMerger.java @@ -82,10 +82,11 @@ public class AuthorMerger { .map(ba -> new Tuple2<>(sim(ba, a._2()), ba)) .max(Comparator.comparing(Tuple2::_1)); - if(simAuthor.isPresent()) { + if (simAuthor.isPresent()) { double th = THRESHOLD; - //increase the threshold if the surname is too short - if (simAuthor.get()._2().getSurname() != null && simAuthor.get()._2().getSurname().length()<=3) + // increase the threshold if the surname is too short + if (simAuthor.get()._2().getSurname() != null + && simAuthor.get()._2().getSurname().length() <= 3) th = 0.99; if (simAuthor.get()._1() > th) { @@ -100,9 +101,10 @@ public class AuthorMerger { } public static String pidToComparableString(StructuredProperty pid) { - return (pid.getQualifier() != null ? - pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase() : "" : "") - + (pid.getValue() != null ? pid.getValue().toLowerCase() : ""); + return (pid.getQualifier() != null + ? pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase() : "" + : "") + + (pid.getValue() != null ? pid.getValue().toLowerCase() : ""); } public static int countAuthorsPids(List authors) { @@ -123,14 +125,13 @@ public class AuthorMerger { final Person pa = parse(a); final Person pb = parse(b); - //if both are accurate (e.g. they have name and surname) + // if both are accurate (e.g. they have name and surname) if (pa.isAccurate() & pb.isAccurate()) { - return - new JaroWinkler().score(normalize(pa.getSurnameString()), normalize(pb.getSurnameString()))*0.5 - + new JaroWinkler().score(normalize(pa.getNameString()), normalize(pb.getNameString()))*0.5; + return new JaroWinkler().score(normalize(pa.getSurnameString()), normalize(pb.getSurnameString())) * 0.5 + + new JaroWinkler().score(normalize(pa.getNameString()), normalize(pb.getNameString())) * 0.5; } else { - return - new JaroWinkler().score(normalize(pa.getNormalisedFullname()), normalize(pb.getNormalisedFullname())); + return new JaroWinkler() + .score(normalize(pa.getNormalisedFullname()), normalize(pb.getNormalisedFullname())); } } diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java index 55879030be..144d5d49a0 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java @@ -96,7 +96,7 @@ public class EntityMergerTest implements Serializable { public void publicationMergerTest2() throws InstantiationException, IllegalAccessException, IOException { Publication pub_merged = DedupRecordFactory - .entityMerger(dedupId, publications2.iterator(), 0, dataInfo, Publication.class); + .entityMerger(dedupId, publications2.iterator(), 0, dataInfo, Publication.class); assertEquals(pub_merged.getAuthor().size(), 27); // insert assertions here From 6b56cae57db666e79866c627fa1d95cf080c30fe Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Sat, 23 May 2020 09:57:39 +0200 Subject: [PATCH 19/37] added mapping for bestaccessrights --- .../dhp/schema/common/LicenseComparator.java | 69 +++++++++++++++++++ .../raw/AbstractMdRecordToOafMapper.java | 52 +++++++++----- .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 21 +++++- 3 files changed, 124 insertions(+), 18 deletions(-) create mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/LicenseComparator.java diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/LicenseComparator.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/LicenseComparator.java new file mode 100644 index 0000000000..1fb8421804 --- /dev/null +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/LicenseComparator.java @@ -0,0 +1,69 @@ + +package eu.dnetlib.dhp.schema.common; + +import eu.dnetlib.dhp.schema.oaf.Qualifier; + +import java.util.Comparator; + +public class LicenseComparator implements Comparator { + + @Override + public int compare(Qualifier left, Qualifier right) { + + if (left == null && right == null) + return 0; + if (left == null) + return 1; + if (right == null) + return -1; + + String lClass = left.getClassid(); + String rClass = right.getClassid(); + + if (lClass.equals(rClass)) + return 0; + + if (lClass.equals("OPEN SOURCE")) + return -1; + if (rClass.equals("OPEN SOURCE")) + return 1; + + if (lClass.equals("OPEN")) + return -1; + if (rClass.equals("OPEN")) + return 1; + + if (lClass.equals("6MONTHS")) + return -1; + if (rClass.equals("6MONTHS")) + return 1; + + if (lClass.equals("12MONTHS")) + return -1; + if (rClass.equals("12MONTHS")) + return 1; + + if (lClass.equals("EMBARGO")) + return -1; + if (rClass.equals("EMBARGO")) + return 1; + + if (lClass.equals("RESTRICTED")) + return -1; + if (rClass.equals("RESTRICTED")) + return 1; + + if (lClass.equals("CLOSED")) + return -1; + if (rClass.equals("CLOSED")) + return 1; + + if (lClass.equals("UNKNOWN")) + return -1; + if (rClass.equals("UNKNOWN")) + return 1; + + // Else (but unlikely), lexicographical ordering will do. + return lClass.compareTo(rClass); + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java index c4639eb44a..46fff2f425 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java @@ -10,24 +10,12 @@ import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.listFields; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.oaiIProvenance; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.qualifier; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty; -import static eu.dnetlib.dhp.schema.common.ModelConstants.DATASET_DEFAULT_RESULTTYPE; -import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PID_TYPES; -import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PRODUCED_BY; -import static eu.dnetlib.dhp.schema.common.ModelConstants.ORP_DEFAULT_RESULTTYPE; -import static eu.dnetlib.dhp.schema.common.ModelConstants.OUTCOME; -import static eu.dnetlib.dhp.schema.common.ModelConstants.PRODUCES; -import static eu.dnetlib.dhp.schema.common.ModelConstants.PUBLICATION_DEFAULT_RESULTTYPE; -import static eu.dnetlib.dhp.schema.common.ModelConstants.REPOSITORY_PROVENANCE_ACTIONS; -import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_PROJECT; -import static eu.dnetlib.dhp.schema.common.ModelConstants.SOFTWARE_DEFAULT_RESULTTYPE; +import static eu.dnetlib.dhp.schema.common.ModelConstants.*; +import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_ACCESS_MODES; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Date; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.util.*; +import eu.dnetlib.dhp.schema.common.LicenseComparator; import org.apache.commons.lang3.StringUtils; import org.dom4j.Document; import org.dom4j.DocumentFactory; @@ -285,7 +273,9 @@ public abstract class AbstractMdRecordToOafMapper { r.setCoverage(prepareCoverages(doc, info)); r.setContext(prepareContexts(doc, info)); r.setExternalReference(new ArrayList<>()); // NOT PRESENT IN MDSTORES - r.setInstance(prepareInstances(doc, info, collectedFrom, hostedBy)); + final List instances = prepareInstances(doc, info, collectedFrom, hostedBy); + r.setInstance(instances); + r.setBestaccessright(getBestAccessRights(instances)); } private List prepareContexts(final Document doc, final DataInfo info) { @@ -368,6 +358,34 @@ public abstract class AbstractMdRecordToOafMapper { protected abstract Field prepareDatasetStorageDate(Document doc, DataInfo info); + protected static Qualifier getBestAccessRights(List instanceList) { + if (instanceList != null) { + final Optional min = instanceList + .stream() + .map(i -> i.getAccessright()) + .min(new LicenseComparator()); + + final Qualifier rights = min.isPresent() ? min.get() : new Qualifier(); + + if (StringUtils.isBlank(rights.getClassid())) { + rights.setClassid(UNKNOWN); + } + if (StringUtils.isBlank(rights.getClassname()) + || UNKNOWN.equalsIgnoreCase(rights.getClassname())) { + rights.setClassname(NOT_AVAILABLE); + } + if (StringUtils.isBlank(rights.getSchemeid())) { + rights.setSchemeid(DNET_ACCESS_MODES); + } + if (StringUtils.isBlank(rights.getSchemename())) { + rights.setSchemename(DNET_ACCESS_MODES); + } + + return rights; + } + return null; + } + private Journal prepareJournal(final Document doc, final DataInfo info) { final Node n = doc.selectSingleNode("//oaf:journal"); if (n != null) { diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index 631e7235ec..e9ee2dabe9 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -85,8 +85,18 @@ public class MappersTest { assertTrue(p.getSubject().size() > 0); assertTrue(StringUtils.isNotBlank(p.getJournal().getIssnOnline())); assertTrue(StringUtils.isNotBlank(p.getJournal().getName())); - assertTrue(p.getInstance().size() > 0); + assertNotNull(p.getInstance()); + assertTrue(p.getInstance().size() > 0); + p.getInstance() + .stream() + .forEach(i -> { + assertNotNull(i.getAccessright()); + assertEquals("OPEN", i.getAccessright().getClassid()); + }); + + assertNotNull(p.getBestaccessright()); + assertEquals("OPEN", p.getBestaccessright().getClassid()); assertValidId(r1.getSource()); assertValidId(r1.getTarget()); assertValidId(r2.getSource()); @@ -164,6 +174,15 @@ public class MappersTest { assertTrue(d.getContext().size() > 0); assertTrue(d.getContext().get(0).getId().length() > 0); + assertNotNull(d.getInstance()); + assertTrue(d.getInstance().size() > 0); + d.getInstance() + .stream() + .forEach(i -> { + assertNotNull(i.getAccessright()); + assertEquals("OPEN", i.getAccessright().getClassid()); + }); + assertValidId(r1.getSource()); assertValidId(r1.getTarget()); assertValidId(r2.getSource()); From de108f54d6f3ac33a57db8d5737d10557d9637d2 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Sat, 23 May 2020 10:21:19 +0200 Subject: [PATCH 20/37] code formatting --- .../dhp/schema/common/LicenseComparator.java | 4 +-- .../raw/AbstractMdRecordToOafMapper.java | 10 +++---- .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 26 ++++++++++--------- 3 files changed, 21 insertions(+), 19 deletions(-) diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/LicenseComparator.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/LicenseComparator.java index 1fb8421804..db523ad1ac 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/LicenseComparator.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/LicenseComparator.java @@ -1,10 +1,10 @@ package eu.dnetlib.dhp.schema.common; -import eu.dnetlib.dhp.schema.oaf.Qualifier; - import java.util.Comparator; +import eu.dnetlib.dhp.schema.oaf.Qualifier; + public class LicenseComparator implements Comparator { @Override diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java index 46fff2f425..5c89d5096f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java @@ -15,13 +15,13 @@ import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_ACCESS_MODES; import java.util.*; -import eu.dnetlib.dhp.schema.common.LicenseComparator; import org.apache.commons.lang3.StringUtils; import org.dom4j.Document; import org.dom4j.DocumentFactory; import org.dom4j.DocumentHelper; import org.dom4j.Node; +import eu.dnetlib.dhp.schema.common.LicenseComparator; import eu.dnetlib.dhp.schema.oaf.Author; import eu.dnetlib.dhp.schema.oaf.Context; import eu.dnetlib.dhp.schema.oaf.DataInfo; @@ -361,9 +361,9 @@ public abstract class AbstractMdRecordToOafMapper { protected static Qualifier getBestAccessRights(List instanceList) { if (instanceList != null) { final Optional min = instanceList - .stream() - .map(i -> i.getAccessright()) - .min(new LicenseComparator()); + .stream() + .map(i -> i.getAccessright()) + .min(new LicenseComparator()); final Qualifier rights = min.isPresent() ? min.get() : new Qualifier(); @@ -371,7 +371,7 @@ public abstract class AbstractMdRecordToOafMapper { rights.setClassid(UNKNOWN); } if (StringUtils.isBlank(rights.getClassname()) - || UNKNOWN.equalsIgnoreCase(rights.getClassname())) { + || UNKNOWN.equalsIgnoreCase(rights.getClassname())) { rights.setClassname(NOT_AVAILABLE); } if (StringUtils.isBlank(rights.getSchemeid())) { diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index e9ee2dabe9..b9da9fb297 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -88,12 +88,13 @@ public class MappersTest { assertNotNull(p.getInstance()); assertTrue(p.getInstance().size() > 0); - p.getInstance() - .stream() - .forEach(i -> { - assertNotNull(i.getAccessright()); - assertEquals("OPEN", i.getAccessright().getClassid()); - }); + p + .getInstance() + .stream() + .forEach(i -> { + assertNotNull(i.getAccessright()); + assertEquals("OPEN", i.getAccessright().getClassid()); + }); assertNotNull(p.getBestaccessright()); assertEquals("OPEN", p.getBestaccessright().getClassid()); @@ -176,12 +177,13 @@ public class MappersTest { assertNotNull(d.getInstance()); assertTrue(d.getInstance().size() > 0); - d.getInstance() - .stream() - .forEach(i -> { - assertNotNull(i.getAccessright()); - assertEquals("OPEN", i.getAccessright().getClassid()); - }); + d + .getInstance() + .stream() + .forEach(i -> { + assertNotNull(i.getAccessright()); + assertEquals("OPEN", i.getAccessright().getClassid()); + }); assertValidId(r1.getSource()); assertValidId(r1.getTarget()); From 0ab0206b4d42452a99625043ddecdc75f474d4df Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 25 May 2020 10:11:41 +0200 Subject: [PATCH 21/37] removed null objects from flattened Field in mergeLists --- .../src/main/java/eu/dnetlib/dhp/schema/oaf/OafEntity.java | 1 + 1 file changed, 1 insertion(+) diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/OafEntity.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/OafEntity.java index 09742748d7..2823ee49dd 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/OafEntity.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/OafEntity.java @@ -106,6 +106,7 @@ public abstract class OafEntity extends Oaf implements Serializable { .stream(lists) .filter(Objects::nonNull) .flatMap(List::stream) + .filter(Objects::nonNull) .distinct() .collect(Collectors.toList()); } From 4b34872b44701581d5378c13765483e6d42fd77f Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 25 May 2020 10:14:15 +0200 Subject: [PATCH 22/37] using Objects.equals to check Field equivalence --- dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Field.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Field.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Field.java index 1a85c6842d..8358bc4b37 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Field.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Field.java @@ -2,6 +2,7 @@ package eu.dnetlib.dhp.schema.oaf; import java.io.Serializable; +import java.util.Objects; public class Field implements Serializable { @@ -39,6 +40,6 @@ public class Field implements Serializable { if (getClass() != obj.getClass()) return false; Field other = (Field) obj; - return getValue().equals(other.getValue()); + return Objects.equals(getValue(), other.getValue()); } } From 8f6ce970f9e527d30f30cf69882cfc67505d70bf Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 25 May 2020 10:25:55 +0200 Subject: [PATCH 23/37] moved PacePerson to dhp-common to avoid conflict in dependency with graph-mapper --- .../src/main/java/eu/dnetlib/dhp}/common/PacePerson.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename {dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw => dhp-common/src/main/java/eu/dnetlib/dhp}/common/PacePerson.java (99%) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/PacePerson.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/PacePerson.java similarity index 99% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/PacePerson.java rename to dhp-common/src/main/java/eu/dnetlib/dhp/common/PacePerson.java index 6e474f2f38..1909ddcca6 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/PacePerson.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/PacePerson.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.oa.graph.raw.common; +package eu.dnetlib.dhp.common; import java.nio.charset.StandardCharsets; import java.text.Normalizer; From b258f99ece5ab9ff2ffce961dc91d18383690947 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 25 May 2020 10:26:48 +0200 Subject: [PATCH 24/37] fix for issue that duplicated result --- .../PrepareDatasourceCountryAssociation.java | 65 ++++++++++++------- .../PrepareResultCountrySet.java | 38 +++++++++-- 2 files changed, 76 insertions(+), 27 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java index 98b573102a..f28c5aa06d 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java @@ -77,9 +77,15 @@ public class PrepareDatasourceCountryAssociation { List allowedtypes, String inputPath, String outputPath) { - String whitelisted = ""; - for (String i : whitelist) { - whitelisted += " OR id = '" + i + "'"; + String whitelisted = " d.id = '" + whitelist.get(0) + "'"; + for (int i = 1; i < whitelist.size(); i++) { + whitelisted += " OR d.id = '" + whitelist.get(i) + "'"; + } + + String allowed = "d.datasourcetype.classid = '" + allowedtypes.get(0) + "'"; + + for (int i = 1; i < allowedtypes.size(); i++) { + allowed += " OR d.datasourcetype.classid = '" + allowedtypes.get(i) + "'"; } Dataset datasource = readPath(spark, inputPath + "/datasource", Datasource.class); @@ -90,26 +96,39 @@ public class PrepareDatasourceCountryAssociation { relation.createOrReplaceTempView("relation"); organization.createOrReplaceTempView("organization"); - String query = "SELECT source dataSourceId, named_struct('classid', country.classid, 'classname', country.classname) country " - + "FROM ( SELECT id " - + " FROM datasource " - + " WHERE (datainfo.deletedbyinference = false " - + whitelisted - + ") " - + getConstraintList("datasourcetype.classid = '", allowedtypes) - + ") d " - + "JOIN ( SELECT source, target " - + " FROM relation " - + " WHERE relclass = '" - + ModelConstants.IS_PROVIDED_BY - + "' " - + " AND datainfo.deletedbyinference = false ) rel " - + "ON d.id = rel.source " - + "JOIN (SELECT id, country " - + " FROM organization " - + " WHERE datainfo.deletedbyinference = false " - + " AND length(country.classid) > 0) o " - + "ON o.id = rel.target"; +// String query = "SELECT source dataSourceId, named_struct('classid', country.classid, 'classname', country.classname) country " +// + "FROM ( SELECT id " +// + " FROM datasource " +// + " WHERE (datainfo.deletedbyinference = false " +// + whitelisted +// + ") " +// + getConstraintList("datasourcetype.classid = '", allowedtypes) +// + ") d " +// + "JOIN ( SELECT source, target " +// + " FROM relation " +// + " WHERE relclass = '" +// + ModelConstants.IS_PROVIDED_BY +// + "' " +// + " AND datainfo.deletedbyinference = false ) rel " +// + "ON d.id = rel.source " +// + "JOIN (SELECT id, country " +// + " FROM organization " +// + " WHERE datainfo.deletedbyinference = false " +// + " AND length(country.classid) > 0) o " +// + "ON o.id = rel.target"; + + String query = "SELECT source dataSourceId, " + + "named_struct('classid', country.classid, 'classname', country.classname) country " + + "FROM datasource d " + + "JOIN relation rel " + + "ON d.id = rel.source " + + "JOIN organization o " + + "ON o.id = rel.target " + + "WHERE rel.datainfo.deletedbyinference = false " + + "and rel.relclass = '" + ModelConstants.IS_PROVIDED_BY + "'" + + "and o.datainfo.deletedbyinference = false " + + "and length(o.country.classid) > 0 " + + "and (" + allowed + " or " + whitelisted + ")"; spark .sql(query) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java index 34b3764131..8d0d6c48b9 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java @@ -4,7 +4,12 @@ package eu.dnetlib.dhp.countrypropagation; import static eu.dnetlib.dhp.PropagationConstant.*; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; +import java.util.ArrayList; +import java.util.Set; +import java.util.stream.Collectors; + import org.apache.commons.io.IOUtils; +import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; import org.apache.spark.sql.*; import org.apache.spark.sql.Dataset; @@ -13,6 +18,7 @@ import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.oaf.*; +import scala.Tuple2; public class PrepareResultCountrySet { private static final Logger log = LoggerFactory.getLogger(PrepareResultCountrySet.class); @@ -60,6 +66,7 @@ public class PrepareResultCountrySet { conf, isSparkSessionManaged, spark -> { + removeOutputDir(spark, outputPath); getPotentialResultToUpdate( spark, inputPath, @@ -89,10 +96,33 @@ public class PrepareResultCountrySet { spark .sql(RESULT_COUNTRYSET_QUERY) .as(Encoders.bean(ResultCountrySet.class)) - .write() - .option("compression", "gzip") - .mode(SaveMode.Append) - .json(outputPath); + .toJavaRDD() + .mapToPair(value -> new Tuple2<>(value.getResultId(), value)) + .reduceByKey((a, b) -> { + ArrayList countryList = a.getCountrySet(); + Set countryCodes = countryList + .stream() + .map(country -> country.getClassid()) + .collect(Collectors.toSet()); + b + .getCountrySet() + .stream() + .forEach(c -> { + if (!countryCodes.contains(c.getClassid())) { + countryList.add(c); + countryCodes.add(c.getClassid()); + } + + }); + a.setCountrySet(countryList); + return a; + }) + .map(couple -> OBJECT_MAPPER.writeValueAsString(couple._2())) + .saveAsTextFile(outputPath, GzipCodec.class); +// .write() +// .option("compression", "gzip") +// .mode(SaveMode.Append) +// .json(outputPath); } } From 8f51af4e9bb965af3994b607d0288d0ba7bb31d4 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 25 May 2020 10:34:30 +0200 Subject: [PATCH 25/37] added PacePerson to get name surname for authors having only fullname set --- .../SparkOrcidToResultFromSemRelJob.java | 98 +++++++++++-------- 1 file changed, 56 insertions(+), 42 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java index bea847ca76..e4ffc56982 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java @@ -7,6 +7,7 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import java.util.List; import java.util.Optional; +import eu.dnetlib.dhp.common.PacePerson; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.apache.spark.SparkConf; @@ -129,61 +130,74 @@ public class SparkOrcidToResultFromSemRelJob { } private static boolean enrichAuthor(AutoritativeAuthor autoritative_author, Author author) { - boolean toaddpid = false; + boolean toaddpid = false; - if (StringUtils.isNotEmpty(autoritative_author.getSurname())) { - if (StringUtils.isNotEmpty(author.getSurname())) { - if (autoritative_author - .getSurname() - .trim() - .equalsIgnoreCase(author.getSurname().trim())) { + String author_name = author.getName(); + String author_surname = author.getSurname(); - // have the same surname. Check the name - if (StringUtils.isNotEmpty(autoritative_author.getName())) { - if (StringUtils.isNotEmpty(author.getName())) { - if (autoritative_author - .getName() - .trim() - .equalsIgnoreCase(author.getName().trim())) { - toaddpid = true; - } - // they could be differently written (i.e. only the initials of the name - // in one of the two - else { + if(StringUtils.isEmpty(author_name) || StringUtils.isEmpty(author_surname)){ + PacePerson pp = new PacePerson(author.getFullname(), false); + if (pp.isAccurate()){ + author_name = pp.getNormalisedFirstName(); + author_surname = pp.getNormalisedSurname(); + + } + } + + if (StringUtils.isNotEmpty(autoritative_author.getSurname())) { + if (StringUtils.isNotEmpty(author_surname)) { + if (autoritative_author + .getSurname() + .trim() + .equalsIgnoreCase(author_surname.trim())) { + + // have the same surname. Check the name + if (StringUtils.isNotEmpty(autoritative_author.getName())) { + if (StringUtils.isNotEmpty(author_name)) { if (autoritative_author - .getName() - .trim() - .substring(0, 0) - .equalsIgnoreCase(author.getName().trim().substring(0, 0))) { + .getName() + .trim() + .equalsIgnoreCase(author_name.trim())) { toaddpid = true; } + // they could be differently written (i.e. only the initials of the name + // in one of the two + else { + if (autoritative_author + .getName() + .trim() + .substring(0, 0) + .equalsIgnoreCase(author_name.trim().substring(0, 0))) { + toaddpid = true; + } + } } } } } } - } - if (toaddpid) { - StructuredProperty p = new StructuredProperty(); - p.setValue(autoritative_author.getOrcid()); - p.setQualifier(getQualifier(PROPAGATION_AUTHOR_PID, PROPAGATION_AUTHOR_PID)); - p - .setDataInfo( - getDataInfo( - PROPAGATION_DATA_INFO_TYPE, - PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_ID, - PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_NAME)); + if (toaddpid) { + StructuredProperty p = new StructuredProperty(); + p.setValue(autoritative_author.getOrcid()); + p.setQualifier(getQualifier(PROPAGATION_AUTHOR_PID, PROPAGATION_AUTHOR_PID)); + p + .setDataInfo( + getDataInfo( + PROPAGATION_DATA_INFO_TYPE, + PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_ID, + PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_NAME)); + + Optional> authorPid = Optional.ofNullable(author.getPid()); + if (authorPid.isPresent()) { + authorPid.get().add(p); + } else { + author.setPid(Lists.newArrayList(p)); + } - Optional> authorPid = Optional.ofNullable(author.getPid()); - if (authorPid.isPresent()) { - authorPid.get().add(p); - } else { - author.setPid(Lists.newArrayList(p)); } - + return toaddpid; } - return toaddpid; - } + private static boolean containsAllowedPid(Author a) { Optional> pids = Optional.ofNullable(a.getPid()); From f754c424bd87aaf6d403e3734a281288b584de83 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 25 May 2020 10:35:02 +0200 Subject: [PATCH 26/37] changed logic to compute only onece PacePerson for each Author to be enriched --- .../SparkOrcidToResultFromSemRelJob.java | 24 +++++++++---------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java index e4ffc56982..34882b5872 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java @@ -122,29 +122,24 @@ public class SparkOrcidToResultFromSemRelJob { } private static void enrichAuthor(Author a, List au) { + PacePerson pp = new PacePerson(a.getFullname(), false); for (AutoritativeAuthor aa : au) { - if (enrichAuthor(aa, a)) { + if (enrichAuthor(aa, a, pp.getNormalisedFirstName(), pp.getNormalisedSurname() )) { return; } } } - private static boolean enrichAuthor(AutoritativeAuthor autoritative_author, Author author) { + private static boolean enrichAuthor(AutoritativeAuthor autoritative_author, Author author, + String author_name, + String author_surname) { boolean toaddpid = false; - String author_name = author.getName(); - String author_surname = author.getSurname(); - - if(StringUtils.isEmpty(author_name) || StringUtils.isEmpty(author_surname)){ - PacePerson pp = new PacePerson(author.getFullname(), false); - if (pp.isAccurate()){ - author_name = pp.getNormalisedFirstName(); - author_surname = pp.getNormalisedSurname(); - - } - } if (StringUtils.isNotEmpty(autoritative_author.getSurname())) { + if (StringUtils.isNotEmpty(author.getSurname())){ + author_surname = author.getSurname(); + } if (StringUtils.isNotEmpty(author_surname)) { if (autoritative_author .getSurname() @@ -153,6 +148,9 @@ public class SparkOrcidToResultFromSemRelJob { // have the same surname. Check the name if (StringUtils.isNotEmpty(autoritative_author.getName())) { + if(StringUtils.isNotEmpty(author.getName())){ + author_name = author.getName(); + } if (StringUtils.isNotEmpty(author_name)) { if (autoritative_author .getName() From dbde2d243a21c218a22a315af785236f9d56658f Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 25 May 2020 10:35:39 +0200 Subject: [PATCH 27/37] changed due to move of PacePerson from dhp-graph-mapper to dhp-common --- .../main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java | 2 +- .../main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java index af9fe7197e..53c0913c23 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java @@ -23,7 +23,7 @@ import org.dom4j.Node; import com.google.common.collect.Lists; -import eu.dnetlib.dhp.oa.graph.raw.common.PacePerson; +import eu.dnetlib.dhp.common.PacePerson; import eu.dnetlib.dhp.schema.oaf.Author; import eu.dnetlib.dhp.schema.oaf.DataInfo; import eu.dnetlib.dhp.schema.oaf.Field; diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java index 9c74c4a938..b9a1599174 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java @@ -28,7 +28,7 @@ import org.apache.commons.lang3.StringUtils; import org.dom4j.Document; import org.dom4j.Node; -import eu.dnetlib.dhp.oa.graph.raw.common.PacePerson; +import eu.dnetlib.dhp.common.PacePerson; import eu.dnetlib.dhp.schema.oaf.Author; import eu.dnetlib.dhp.schema.oaf.DataInfo; import eu.dnetlib.dhp.schema.oaf.Field; From 74215f6d9f2ae3ea9a06f7e66b10be25a2f08568 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 25 May 2020 10:38:16 +0200 Subject: [PATCH 28/37] refactoring --- .../projecttoresult/SparkResultToProjectThroughSemRelJob.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java index 17f6a057d8..0791fd68ce 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java @@ -105,7 +105,7 @@ public class SparkResultToProjectThroughSemRelJob { .stream() .forEach( (p -> { - potential_update.getProjectSet().remove(p); + potential_update.getProjectSet().remove(p); })); } String resId = potential_update.getResultId(); From da1e5cf55703dd318f33cae1b780111a3179f67d Mon Sep 17 00:00:00 2001 From: miconis Date: Mon, 25 May 2020 18:02:57 +0200 Subject: [PATCH 29/37] implementation of the result title merge. main title with higher trust, distinct between the others --- .../eu/dnetlib/dhp/schema/oaf/Result.java | 28 +++++++++++++++++++ .../dhp/oa/dedup/EntityMergerTest.java | 10 ++++++- .../dhp/dedup/json/publication_merge.json | 2 +- 3 files changed, 38 insertions(+), 2 deletions(-) diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Result.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Result.java index 711b1ca681..213a585a8c 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Result.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Result.java @@ -244,7 +244,25 @@ public class Result extends OafEntity implements Serializable { subject = mergeLists(subject, r.getSubject()); + //merge title lists: main title with higher trust and distinct between the others + StructuredProperty baseMainTitle = null; + if(title != null) { + baseMainTitle = getMainTitle(title); + title.remove(baseMainTitle); + } + + StructuredProperty newMainTitle = null; + if(r.getTitle() != null) { + newMainTitle = getMainTitle(r.getTitle()); + r.getTitle().remove(newMainTitle); + } + + if (newMainTitle != null && compareTrust(this, r) < 0 ) + baseMainTitle = newMainTitle; + title = mergeLists(title, r.getTitle()); + if (title != null && baseMainTitle != null) + title.add(baseMainTitle); relevantdate = mergeLists(relevantdate, r.getRelevantdate()); @@ -294,4 +312,14 @@ public class Result extends OafEntity implements Serializable { } return a.size() > b.size() ? a : b; } + + private StructuredProperty getMainTitle(List titles) { + //need to check if the list of titles contains more than 1 main title? (in that case, we should chose which main title select in the list) + for (StructuredProperty title: titles) { + if (title.getQualifier() != null && title.getQualifier().getClassid() != null) + if (title.getQualifier().getClassid().equals("main title")) + return title; + } + return null; + } } diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java index 144d5d49a0..f4b2c22522 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java @@ -90,10 +90,18 @@ public class EntityMergerTest implements Serializable { // verify authors assertEquals(pub_merged.getAuthor().size(), 9); assertEquals(AuthorMerger.countAuthorsPids(pub_merged.getAuthor()), 4); + + //verify title + int count = 0; + for (StructuredProperty title: pub_merged.getTitle()){ + if (title.getQualifier().getClassid().equals("main title")) + count++; + } + assertEquals(count, 1); } @Test - public void publicationMergerTest2() throws InstantiationException, IllegalAccessException, IOException { + public void publicationMergerTest2() throws InstantiationException, IllegalAccessException { Publication pub_merged = DedupRecordFactory .entityMerger(dedupId, publications2.iterator(), 0, dataInfo, Publication.class); diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/json/publication_merge.json b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/json/publication_merge.json index 015f9294a3..28548c5322 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/json/publication_merge.json +++ b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/json/publication_merge.json @@ -1,3 +1,3 @@ {"context": [], "dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:datasetarchive", "classname": "sysimport:crosswalk:datasetarchive", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "dedup-similarity-result-levenstein", "invisible": false, "trust": "0.95"}, "resourcetype": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "pid": [], "contributor": [], "resulttype": {"classid": "publication", "classname": "publication", "schemename": "dnet:result_typologies", "schemeid": "dnet:result_typologies"}, "relevantdate": [], "collectedfrom": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "Journal.fi", "key": "10|openaire____::6eef8049d0feedc089ee009abca55e35"}], "id": "50|a89337edbe55::4930db9e954866d70916cbfba9f81f97", "subject": [], "instance": [{"refereed": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "hostedby": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "Journal.fi", "key": "10|openaire____::6eef8049d0feedc089ee009abca55e35"}, "processingchargeamount": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "license": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "url": [], "distributionlocation": "", "processingchargecurrency": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "dateofacceptance": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "2016-01-01"}, "collectedfrom": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "Journal.fi", "key": "10|openaire____::6eef8049d0feedc089ee009abca55e35"}, "accessright": {"classid": "OPEN", "classname": "Open Access", "schemename": "dnet:access_modes", "schemeid": "dnet:access_modes"}, "instancetype": {"classid": "0001", "classname": "Article", "schemename": "dnet:dataCite_resource", "schemeid": "dnet:dataCite_resource"}}], "embargoenddate": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "lastupdatetimestamp": 0, "author": [{"surname": "Go\\u0308tz", "name": "Julia", "pid": [], "rank": 5, "affiliation": [], "fullname": "G\\u00f6tz, Julia"}, {"surname": "Wolff", "name": "Stephan", "pid": [], "rank": 6, "affiliation": [], "fullname": "Wolff, Stephan"}, {"surname": "Jansen", "name": "Olav", "pid": [], "rank": 7, "affiliation": [], "fullname": "Jansen, Olav"}, {"surname": "Dressler", "name": "Dirk", "pid": [{"qualifier": {"classid": "ORCID", "classname": "ORCID"}, "value": "0000-0000-0656-9999"},{"qualifier": {"classid": "id", "classname": "id"}, "value": "987654321"}], "rank": 8, "affiliation": [], "fullname": "Dressler, Dirk"}, {"surname": "Schneider", "name": "Susanne A.", "pid": [], "rank": 9, "affiliation": [], "fullname": "Schneider, Susanne A."}], "source": [], "dateofcollection": "2019-11-05T14:49:22.351Z", "fulltext": [], "dateoftransformation": "2019-11-05T16:10:58.988Z", "description": [], "format": [], "journal": {"issnPrinted": "1459-6067", "conferencedate": "", "conferenceplace": "", "name": "Agricultural and Food Science", "edition": "", "iss": "3", "sp": "", "vol": "27", "dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "issnOnline": "1795-1895", "ep": "", "issnLinking": ""}, "coverage": [], "publisher": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "language": {"classid": "eng", "classname": "English", "schemename": "dnet:languages", "schemeid": "dnet:languages"}, "bestaccessright": {"classid": "OPEN", "classname": "Open Access", "schemename": "dnet:access_modes", "schemeid": "dnet:access_modes"}, "country": [], "extraInfo": [], "originalId": [], "dateofacceptance": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "2018-09-30"}, "title": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "qualifier": {"classid": "main title", "classname": "main title", "schemename": "dnet:dataCite_title", "schemeid": "dnet:dataCite_title"}, "value": "Altered brain activation in a reversal learning task unmasks adaptive changes in cognitive control in writer's cramp"}]} {"context": [], "dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "sysimport:crosswalk:repository", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": true, "inferenceprovenance": "dedup-similarity-result-levenstein", "invisible": false, "trust": "0.9"}, "resourcetype": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "pid": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "qualifier": {"classid": "doi", "classname": "doi", "schemename": "dnet:pid_types", "schemeid": "dnet:pid_types"}, "value": "10.1016/j.nicl.2015.11.006"}], "contributor": [], "resulttype": {"classid": "publication", "classname": "publication", "schemename": "dnet:result_typologies", "schemeid": "dnet:result_typologies"}, "relevantdate": [], "collectedfrom": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "BASE (Open Access Aggregator)", "key": "10|openaire____::df45502607927471ecf8a6ae83683ff5"}], "id": "50|base_oa_____::0968af610a356656706657e4f234b340", "subject": [], "instance": [{"refereed": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "hostedby": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "NeuroImage: Clinical", "key": "10|doajarticles::0c0e74daa5d95504eade9c81ebbd5b8a"}, "processingchargeamount": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "license": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "http://creativecommons.org/licenses/by-nc-nd/4.0/"}, "url": ["http://dx.doi.org/10.1016/j.nicl.2015.11.006"], "distributionlocation": "", "processingchargecurrency": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "dateofacceptance": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "2016-01-01"}, "collectedfrom": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "BASE (Open Access Aggregator)", "key": "10|openaire____::df45502607927471ecf8a6ae83683ff5"}, "accessright": {"classid": "OPEN", "classname": "Open Access", "schemename": "dnet:access_modes", "schemeid": "dnet:access_modes"}, "instancetype": {"classid": "0001", "classname": "Article", "schemename": "dnet:publication_resource", "schemeid": "dnet:publication_resource"}}], "embargoenddate": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "lastupdatetimestamp": 0, "author": [{"surname": "Zeuner", "name": "Kirsten E.", "pid": [], "rank": 1, "affiliation": [], "fullname": "Zeuner, Kirsten E."}, {"surname": "Knutzen", "name": "Arne", "pid": [], "rank": 2, "affiliation": [], "fullname": "Knutzen, Arne"}, {"surname": "Granert", "name": "Oliver", "pid": [{"qualifier": {"classid": "ORCID", "classname": "ORCID"}, "value": "0000-0002-0656-1023"}], "rank": 3, "affiliation": [], "fullname": "Granert, Oliver"}, {"surname": "Sablowsky", "name": "Simone", "pid": [], "rank": 4, "affiliation": [], "fullname": "Sablowsky, Simone"}, {"surname": "Go\\u0308tz", "name": "Julia", "pid": [], "rank": 5, "affiliation": [], "fullname": "G\\u00f6tz, Julia"}, {"surname": "Wolff", "name": "Stephan", "pid": [], "rank": 6, "affiliation": [], "fullname": "Wolff, Stephan"}, {"surname": "Jansen", "name": "Olav", "pid": [], "rank": 7, "affiliation": [], "fullname": "Jansen, Olav"}, {"surname": "Dressler", "name": "Dirk", "pid": [], "rank": 8, "affiliation": [], "fullname": "Dressler, Dirk"}, {"surname": "Schneider", "name": "Susanne A.", "pid": [], "rank": 9, "affiliation": [], "fullname": "Schneider, Susanne A."}, {"surname": "Klein", "name": "Christine", "pid": [], "rank": 10, "affiliation": [], "fullname": "Klein, Christine"}, {"surname": "Deuschl", "name": "Gu\\u0308nther", "pid": [], "rank": 11, "affiliation": [], "fullname": "Deuschl, G\\u00fcnther"}, {"surname": "Eimeren", "name": "Thilo", "pid": [], "rank": 12, "affiliation": [], "fullname": "van Eimeren, Thilo"}, {"surname": "Witt", "name": "Karsten", "pid": [], "rank": 13, "affiliation": [], "fullname": "Witt, Karsten"}], "source": [], "dateofcollection": "2017-07-27T19:04:09.131Z", "fulltext": [], "dateoftransformation": "2019-01-23T10:15:19.582Z", "description": [], "format": [], "journal": {"issnPrinted": "2213-1582", "conferencedate": "", "conferenceplace": "", "name": "NeuroImage: Clinical", "edition": "", "iss": "", "sp": "63", "vol": "10", "dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "issnOnline": "", "ep": "70", "issnLinking": ""}, "coverage": [], "publisher": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "Elsevier BV"}, "language": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "bestaccessright": {"classid": "OPEN", "classname": "Open Access", "schemename": "dnet:access_modes", "schemeid": "dnet:access_modes"}, "country": [{"classid": "IT", "classname": "Italy", "schemeid": "dnet:countries", "schemename": "dnet:countries"}], "extraInfo": [], "originalId": ["10.1016/j.nicl.2015.11.006"], "dateofacceptance": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "2016-01-01"}, "title": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "qualifier": {"classid": "main title", "classname": "main title", "schemename": "dnet:dataCite_title", "schemeid": "dnet:dataCite_title"}, "value": "Altered brain activation in a reversal learning task unmasks adaptive changes in cognitive control in writer's cramp"}]} -{"context": [], "dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:datasetarchive", "classname": "sysimport:crosswalk:datasetarchive", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": true, "inferenceprovenance": "dedup-similarity-result-levenstein", "invisible": false, "trust": "0.9"}, "resourcetype": {"classid": "0004", "classname": "Conference object", "schemename": "dnet:dataCite_resource", "schemeid": "dnet:dataCite_resource"}, "pid": [], "contributor": [], "resulttype": {"classid": "publication", "classname": "publication", "schemename": "dnet:result_typologies", "schemeid": "dnet:result_typologies"}, "relevantdate": [], "collectedfrom": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "CRIS UNS (Current Research Information System University of Novi Sad)", "key": "10|CRIS_UNS____::f66f1bd369679b5b077dcdf006089556"}], "id": "50|CrisUnsNoviS::9f9d014eea45dab432cab636c4c9cf39", "subject": [], "instance": [{"refereed": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "hostedby": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "CRIS UNS (Current Research Information System University of Novi Sad)", "key": "10|CRIS_UNS____::f66f1bd369679b5b077dcdf006089556"}, "processingchargeamount": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "license": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "url": ["https://www.cris.uns.ac.rs/record.jsf?recordId=113444&source=OpenAIRE&language=en"], "distributionlocation": "", "processingchargecurrency": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "dateofacceptance": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "2019-01-01"}, "collectedfrom": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "CRIS UNS (Current Research Information System University of Novi Sad)", "key": "10|CRIS_UNS____::f66f1bd369679b5b077dcdf006089556"}, "accessright": {"classid": "UNKNOWN", "classname": "UNKNOWN", "schemename": "dnet:access_modes", "schemeid": "dnet:access_modes"}, "instancetype": {"classid": "0004", "classname": "Conference object", "schemename": "dnet:dataCite_resource", "schemeid": "dnet:dataCite_resource"}}], "embargoenddate": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "lastupdatetimestamp": 0, "author": [{"surname": "Zeuner", "name": "Kirsten E.", "pid": [], "rank": 1, "affiliation": [], "fullname": "Zeuner, Kirsten E."}, {"surname": "Knutzen", "name": "Arne", "pid": [], "rank": 2, "affiliation": [], "fullname": "Knutzen, Arne"}, {"surname": "Granert", "name": "Oliver", "pid": [{"qualifier": {"classid": "ORCID", "classname": "ORCID"}, "value": "0000-0002-0656-1023"}, {"qualifier": {"classid": "pubmed", "classname": "pubmed"}, "value": "pubmed.it"}], "rank": 3, "affiliation": [], "fullname": "Granert, Oliver"}, {"surname": "Sablowsky", "name": "Simone", "pid": [{"qualifier": {"classid": "id", "classname": "id"}, "value": "12345678"}], "rank": 4, "affiliation": [], "fullname": "Sablowsky, Simone"}, {"surname": "Go\\u0308tz", "name": "Julia", "pid": [], "rank": 5, "affiliation": [], "fullname": "G\\u00f6tz, Julia"}, {"surname": "Wolff", "name": "Stephan", "pid": [], "rank": 6, "affiliation": [], "fullname": "Wolff, Stephan"}, {"surname": "Jansen", "name": "Olav", "pid": [{"qualifier": {"classid": "ORCID", "classname": "ORCID"}, "value": "0000-0000-0656-1023"},{"qualifier": {"classid": "id", "classname": "id"}, "value": "987654321"}], "rank": 7, "affiliation": [], "fullname": "Jansen, Olav"}, {"surname": "Dressler", "name": "Dirk", "pid": [], "rank": 8, "affiliation": [], "fullname": "Dressler, Dirk"}, {"surname": "Schneider", "name": "Susanne A.", "pid": [], "rank": 9, "affiliation": [], "fullname": "Schneider, Susanne A."}], "source": [], "dateofcollection": "2020-03-10T15:05:38.685Z", "fulltext": [], "dateoftransformation": "2020-03-11T20:11:13.15Z", "description": [], "format": [], "journal": {"issnPrinted": "", "conferencedate": "", "conferenceplace": "", "name": "", "edition": "", "iss": "", "sp": "", "vol": "", "dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "issnOnline": "", "ep": "", "issnLinking": ""}, "coverage": [], "publisher": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "language": {"classid": "en", "classname": "en", "schemename": "dnet:languages", "schemeid": "dnet:languages"}, "bestaccessright": {"classid": "UNKNOWN", "classname": "not available", "schemename": "dnet:access_modes", "schemeid": "dnet:access_modes"}, "country": [{"classid": "FI", "classname": "Finland", "schemeid": "dnet:countries", "schemename": "dnet:countries"}], "extraInfo": [], "originalId": ["(BISIS)113444", "https://www.cris.uns.ac.rs/record.jsf?recordId=113444&source=OpenAIRE&language=en"], "dateofacceptance": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "2016-01-01"}, "title": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "qualifier": {"classid": "main title", "classname": "main title", "schemename": "dnet:dataCite_title", "schemeid": "dnet:dataCite_title"}, "value": "Antichains of copies of ultrahomogeneous structures"}]} \ No newline at end of file +{"context": [], "dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:datasetarchive", "classname": "sysimport:crosswalk:datasetarchive", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": true, "inferenceprovenance": "dedup-similarity-result-levenstein", "invisible": false, "trust": "0.9"}, "resourcetype": {"classid": "0004", "classname": "Conference object", "schemename": "dnet:dataCite_resource", "schemeid": "dnet:dataCite_resource"}, "pid": [], "contributor": [], "resulttype": {"classid": "publication", "classname": "publication", "schemename": "dnet:result_typologies", "schemeid": "dnet:result_typologies"}, "relevantdate": [], "collectedfrom": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "CRIS UNS (Current Research Information System University of Novi Sad)", "key": "10|CRIS_UNS____::f66f1bd369679b5b077dcdf006089556"}], "id": "50|CrisUnsNoviS::9f9d014eea45dab432cab636c4c9cf39", "subject": [], "instance": [{"refereed": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "hostedby": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "CRIS UNS (Current Research Information System University of Novi Sad)", "key": "10|CRIS_UNS____::f66f1bd369679b5b077dcdf006089556"}, "processingchargeamount": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "license": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "url": ["https://www.cris.uns.ac.rs/record.jsf?recordId=113444&source=OpenAIRE&language=en"], "distributionlocation": "", "processingchargecurrency": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "dateofacceptance": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "2019-01-01"}, "collectedfrom": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "CRIS UNS (Current Research Information System University of Novi Sad)", "key": "10|CRIS_UNS____::f66f1bd369679b5b077dcdf006089556"}, "accessright": {"classid": "UNKNOWN", "classname": "UNKNOWN", "schemename": "dnet:access_modes", "schemeid": "dnet:access_modes"}, "instancetype": {"classid": "0004", "classname": "Conference object", "schemename": "dnet:dataCite_resource", "schemeid": "dnet:dataCite_resource"}}], "embargoenddate": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "lastupdatetimestamp": 0, "author": [{"surname": "Zeuner", "name": "Kirsten E.", "pid": [], "rank": 1, "affiliation": [], "fullname": "Zeuner, Kirsten E."}, {"surname": "Knutzen", "name": "Arne", "pid": [], "rank": 2, "affiliation": [], "fullname": "Knutzen, Arne"}, {"surname": "Granert", "name": "Oliver", "pid": [{"qualifier": {"classid": "ORCID", "classname": "ORCID"}, "value": "0000-0002-0656-1023"}, {"qualifier": {"classid": "pubmed", "classname": "pubmed"}, "value": "pubmed.it"}], "rank": 3, "affiliation": [], "fullname": "Granert, Oliver"}, {"surname": "Sablowsky", "name": "Simone", "pid": [{"qualifier": {"classid": "id", "classname": "id"}, "value": "12345678"}], "rank": 4, "affiliation": [], "fullname": "Sablowsky, Simone"}, {"surname": "Go\\u0308tz", "name": "Julia", "pid": [], "rank": 5, "affiliation": [], "fullname": "G\\u00f6tz, Julia"}, {"surname": "Wolff", "name": "Stephan", "pid": [], "rank": 6, "affiliation": [], "fullname": "Wolff, Stephan"}, {"surname": "Jansen", "name": "Olav", "pid": [{"qualifier": {"classid": "ORCID", "classname": "ORCID"}, "value": "0000-0000-0656-1023"},{"qualifier": {"classid": "id", "classname": "id"}, "value": "987654321"}], "rank": 7, "affiliation": [], "fullname": "Jansen, Olav"}, {"surname": "Dressler", "name": "Dirk", "pid": [], "rank": 8, "affiliation": [], "fullname": "Dressler, Dirk"}, {"surname": "Schneider", "name": "Susanne A.", "pid": [], "rank": 9, "affiliation": [], "fullname": "Schneider, Susanne A."}], "source": [], "dateofcollection": "2020-03-10T15:05:38.685Z", "fulltext": [], "dateoftransformation": "2020-03-11T20:11:13.15Z", "description": [], "format": [], "journal": {"issnPrinted": "", "conferencedate": "", "conferenceplace": "", "name": "", "edition": "", "iss": "", "sp": "", "vol": "", "dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "issnOnline": "", "ep": "", "issnLinking": ""}, "coverage": [], "publisher": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "language": {"classid": "en", "classname": "en", "schemename": "dnet:languages", "schemeid": "dnet:languages"}, "bestaccessright": {"classid": "UNKNOWN", "classname": "not available", "schemename": "dnet:access_modes", "schemeid": "dnet:access_modes"}, "country": [{"classid": "FI", "classname": "Finland", "schemeid": "dnet:countries", "schemename": "dnet:countries"}], "extraInfo": [], "originalId": ["(BISIS)113444", "https://www.cris.uns.ac.rs/record.jsf?recordId=113444&source=OpenAIRE&language=en"], "dateofacceptance": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "2016-01-01"}, "title": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "qualifier": {"classid": "test title", "classname": "test title", "schemename": "dnet:dataCite_title", "schemeid": "dnet:dataCite_title"}, "value": "Antichains of copies of ultrahomogeneous structures"}]} \ No newline at end of file From ae04234472661b4c0b2e99a710b015ba6295023f Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 25 May 2020 19:32:48 +0200 Subject: [PATCH 30/37] DataInfo.deletedbyinference is false by default --- .../src/main/java/eu/dnetlib/dhp/schema/oaf/DataInfo.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/DataInfo.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/DataInfo.java index cc77e1ea0c..9d572ee30a 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/DataInfo.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/DataInfo.java @@ -8,7 +8,7 @@ public class DataInfo implements Serializable { private Boolean invisible = false; private Boolean inferred; - private Boolean deletedbyinference; + private Boolean deletedbyinference = false; private String trust; private String inferenceprovenance; private Qualifier provenanceaction; From 01c2e933958a17d6a23b150d5d8fdccf371baf09 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 25 May 2020 19:48:14 +0200 Subject: [PATCH 31/37] [maven-release-plugin] prepare release dhp-1.2.1 --- dhp-build/dhp-build-assembly-resources/pom.xml | 2 +- dhp-build/dhp-build-properties-maven-plugin/pom.xml | 2 +- dhp-build/dhp-code-style/pom.xml | 2 +- dhp-build/pom.xml | 2 +- dhp-common/pom.xml | 2 +- dhp-schemas/pom.xml | 2 +- dhp-workflows/dhp-actionmanager/pom.xml | 2 +- dhp-workflows/dhp-aggregation/pom.xml | 2 +- dhp-workflows/dhp-blacklist/pom.xml | 6 ++---- dhp-workflows/dhp-broker-events/pom.xml | 2 +- dhp-workflows/dhp-dedup-openaire/pom.xml | 2 +- dhp-workflows/dhp-dedup-scholexplorer/pom.xml | 2 +- dhp-workflows/dhp-distcp/pom.xml | 2 +- dhp-workflows/dhp-enrichment/pom.xml | 6 ++---- dhp-workflows/dhp-graph-mapper/pom.xml | 2 +- dhp-workflows/dhp-graph-provision-scholexplorer/pom.xml | 2 +- dhp-workflows/dhp-graph-provision/pom.xml | 2 +- dhp-workflows/dhp-stats-update/pom.xml | 2 +- dhp-workflows/dhp-worfklow-profiles/pom.xml | 2 +- dhp-workflows/pom.xml | 2 +- pom.xml | 4 ++-- 21 files changed, 24 insertions(+), 28 deletions(-) diff --git a/dhp-build/dhp-build-assembly-resources/pom.xml b/dhp-build/dhp-build-assembly-resources/pom.xml index 327c33d6f2..0b05a71de4 100644 --- a/dhp-build/dhp-build-assembly-resources/pom.xml +++ b/dhp-build/dhp-build-assembly-resources/pom.xml @@ -6,7 +6,7 @@ eu.dnetlib.dhp dhp-build - 1.2.1-SNAPSHOT + 1.2.1 dhp-build-assembly-resources diff --git a/dhp-build/dhp-build-properties-maven-plugin/pom.xml b/dhp-build/dhp-build-properties-maven-plugin/pom.xml index 873046e080..0dab4bf361 100644 --- a/dhp-build/dhp-build-properties-maven-plugin/pom.xml +++ b/dhp-build/dhp-build-properties-maven-plugin/pom.xml @@ -6,7 +6,7 @@ eu.dnetlib.dhp dhp-build - 1.2.1-SNAPSHOT + 1.2.1 dhp-build-properties-maven-plugin diff --git a/dhp-build/dhp-code-style/pom.xml b/dhp-build/dhp-code-style/pom.xml index 8099a72e4e..3e94ba7975 100644 --- a/dhp-build/dhp-code-style/pom.xml +++ b/dhp-build/dhp-code-style/pom.xml @@ -5,7 +5,7 @@ eu.dnetlib.dhp dhp-code-style - 1.2.1-SNAPSHOT + 1.2.1 jar diff --git a/dhp-build/pom.xml b/dhp-build/pom.xml index a700a29187..6e1751ec5d 100644 --- a/dhp-build/pom.xml +++ b/dhp-build/pom.xml @@ -4,7 +4,7 @@ eu.dnetlib.dhp dhp - 1.2.1-SNAPSHOT + 1.2.1 dhp-build pom diff --git a/dhp-common/pom.xml b/dhp-common/pom.xml index c7cb11b085..0fc78c9116 100644 --- a/dhp-common/pom.xml +++ b/dhp-common/pom.xml @@ -5,7 +5,7 @@ eu.dnetlib.dhp dhp - 1.2.1-SNAPSHOT + 1.2.1 ../ diff --git a/dhp-schemas/pom.xml b/dhp-schemas/pom.xml index fe5d0c431a..1f72251fa3 100644 --- a/dhp-schemas/pom.xml +++ b/dhp-schemas/pom.xml @@ -5,7 +5,7 @@ eu.dnetlib.dhp dhp - 1.2.1-SNAPSHOT + 1.2.1 ../ diff --git a/dhp-workflows/dhp-actionmanager/pom.xml b/dhp-workflows/dhp-actionmanager/pom.xml index 22a81f7da8..0c0bbb1342 100644 --- a/dhp-workflows/dhp-actionmanager/pom.xml +++ b/dhp-workflows/dhp-actionmanager/pom.xml @@ -4,7 +4,7 @@ eu.dnetlib.dhp dhp-workflows - 1.2.1-SNAPSHOT + 1.2.1 dhp-actionmanager diff --git a/dhp-workflows/dhp-aggregation/pom.xml b/dhp-workflows/dhp-aggregation/pom.xml index 1c5465c14a..ae64a1cb85 100644 --- a/dhp-workflows/dhp-aggregation/pom.xml +++ b/dhp-workflows/dhp-aggregation/pom.xml @@ -4,7 +4,7 @@ eu.dnetlib.dhp dhp-workflows - 1.2.1-SNAPSHOT + 1.2.1 dhp-aggregation diff --git a/dhp-workflows/dhp-blacklist/pom.xml b/dhp-workflows/dhp-blacklist/pom.xml index 37abc22f61..89d7ec6a75 100644 --- a/dhp-workflows/dhp-blacklist/pom.xml +++ b/dhp-workflows/dhp-blacklist/pom.xml @@ -1,11 +1,9 @@ - + dhp-workflows eu.dnetlib.dhp - 1.2.1-SNAPSHOT + 1.2.1 4.0.0 diff --git a/dhp-workflows/dhp-broker-events/pom.xml b/dhp-workflows/dhp-broker-events/pom.xml index 8b7ec38511..c851fbd2a7 100644 --- a/dhp-workflows/dhp-broker-events/pom.xml +++ b/dhp-workflows/dhp-broker-events/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.1-SNAPSHOT + 1.2.1 4.0.0 diff --git a/dhp-workflows/dhp-dedup-openaire/pom.xml b/dhp-workflows/dhp-dedup-openaire/pom.xml index fcc356ac08..05a373507b 100644 --- a/dhp-workflows/dhp-dedup-openaire/pom.xml +++ b/dhp-workflows/dhp-dedup-openaire/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.1-SNAPSHOT + 1.2.1 4.0.0 dhp-dedup-openaire diff --git a/dhp-workflows/dhp-dedup-scholexplorer/pom.xml b/dhp-workflows/dhp-dedup-scholexplorer/pom.xml index dff376c2d2..21069f42d2 100644 --- a/dhp-workflows/dhp-dedup-scholexplorer/pom.xml +++ b/dhp-workflows/dhp-dedup-scholexplorer/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.1-SNAPSHOT + 1.2.1 4.0.0 diff --git a/dhp-workflows/dhp-distcp/pom.xml b/dhp-workflows/dhp-distcp/pom.xml index c13bec8e6d..7f15601aec 100644 --- a/dhp-workflows/dhp-distcp/pom.xml +++ b/dhp-workflows/dhp-distcp/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.1-SNAPSHOT + 1.2.1 4.0.0 diff --git a/dhp-workflows/dhp-enrichment/pom.xml b/dhp-workflows/dhp-enrichment/pom.xml index fe9833e3ea..ecf00a7bc4 100644 --- a/dhp-workflows/dhp-enrichment/pom.xml +++ b/dhp-workflows/dhp-enrichment/pom.xml @@ -1,11 +1,9 @@ - + dhp-workflows eu.dnetlib.dhp - 1.2.1-SNAPSHOT + 1.2.1 4.0.0 diff --git a/dhp-workflows/dhp-graph-mapper/pom.xml b/dhp-workflows/dhp-graph-mapper/pom.xml index 62968c4107..066aea97ac 100644 --- a/dhp-workflows/dhp-graph-mapper/pom.xml +++ b/dhp-workflows/dhp-graph-mapper/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.1-SNAPSHOT + 1.2.1 4.0.0 diff --git a/dhp-workflows/dhp-graph-provision-scholexplorer/pom.xml b/dhp-workflows/dhp-graph-provision-scholexplorer/pom.xml index e0ee03660b..533093598d 100644 --- a/dhp-workflows/dhp-graph-provision-scholexplorer/pom.xml +++ b/dhp-workflows/dhp-graph-provision-scholexplorer/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.1-SNAPSHOT + 1.2.1 4.0.0 diff --git a/dhp-workflows/dhp-graph-provision/pom.xml b/dhp-workflows/dhp-graph-provision/pom.xml index 39699b3b67..a78eab8ec9 100644 --- a/dhp-workflows/dhp-graph-provision/pom.xml +++ b/dhp-workflows/dhp-graph-provision/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.1-SNAPSHOT + 1.2.1 4.0.0 diff --git a/dhp-workflows/dhp-stats-update/pom.xml b/dhp-workflows/dhp-stats-update/pom.xml index 06408937bc..a15f3efe15 100644 --- a/dhp-workflows/dhp-stats-update/pom.xml +++ b/dhp-workflows/dhp-stats-update/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.1-SNAPSHOT + 1.2.1 4.0.0 dhp-stats-update diff --git a/dhp-workflows/dhp-worfklow-profiles/pom.xml b/dhp-workflows/dhp-worfklow-profiles/pom.xml index 5f99cdc8d3..b1c5da730d 100644 --- a/dhp-workflows/dhp-worfklow-profiles/pom.xml +++ b/dhp-workflows/dhp-worfklow-profiles/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.1-SNAPSHOT + 1.2.1 4.0.0 diff --git a/dhp-workflows/pom.xml b/dhp-workflows/pom.xml index 271c669397..fe7adf79b8 100644 --- a/dhp-workflows/pom.xml +++ b/dhp-workflows/pom.xml @@ -6,7 +6,7 @@ eu.dnetlib.dhp dhp - 1.2.1-SNAPSHOT + 1.2.1 ../ diff --git a/pom.xml b/pom.xml index 419de3540d..77675b576d 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ 4.0.0 eu.dnetlib.dhp dhp - 1.2.1-SNAPSHOT + 1.2.1 pom @@ -38,7 +38,7 @@ scm:git:gitea@code-repo.d4science.org:D-Net/dnet-hadoop.git scm:git:gitea@code-repo.d4science.org:D-Net/dnet-hadoop.git https://code-repo.d4science.org/D-Net/dnet-hadoop/ - HEAD + dhp-1.2.1 This module is the root descriptor for the dnet-hadoop project From 7582532e737d3d6e29febe3374607ce42ff13c8d Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 25 May 2020 19:48:18 +0200 Subject: [PATCH 32/37] [maven-release-plugin] prepare for next development iteration --- dhp-build/dhp-build-assembly-resources/pom.xml | 2 +- dhp-build/dhp-build-properties-maven-plugin/pom.xml | 2 +- dhp-build/dhp-code-style/pom.xml | 2 +- dhp-build/pom.xml | 2 +- dhp-common/pom.xml | 2 +- dhp-schemas/pom.xml | 2 +- dhp-workflows/dhp-actionmanager/pom.xml | 2 +- dhp-workflows/dhp-aggregation/pom.xml | 2 +- dhp-workflows/dhp-blacklist/pom.xml | 2 +- dhp-workflows/dhp-broker-events/pom.xml | 2 +- dhp-workflows/dhp-dedup-openaire/pom.xml | 2 +- dhp-workflows/dhp-dedup-scholexplorer/pom.xml | 2 +- dhp-workflows/dhp-distcp/pom.xml | 2 +- dhp-workflows/dhp-enrichment/pom.xml | 2 +- dhp-workflows/dhp-graph-mapper/pom.xml | 2 +- dhp-workflows/dhp-graph-provision-scholexplorer/pom.xml | 2 +- dhp-workflows/dhp-graph-provision/pom.xml | 2 +- dhp-workflows/dhp-stats-update/pom.xml | 2 +- dhp-workflows/dhp-worfklow-profiles/pom.xml | 2 +- dhp-workflows/pom.xml | 2 +- pom.xml | 4 ++-- 21 files changed, 22 insertions(+), 22 deletions(-) diff --git a/dhp-build/dhp-build-assembly-resources/pom.xml b/dhp-build/dhp-build-assembly-resources/pom.xml index 0b05a71de4..8bae191d38 100644 --- a/dhp-build/dhp-build-assembly-resources/pom.xml +++ b/dhp-build/dhp-build-assembly-resources/pom.xml @@ -6,7 +6,7 @@ eu.dnetlib.dhp dhp-build - 1.2.1 + 1.2.2-SNAPSHOT dhp-build-assembly-resources diff --git a/dhp-build/dhp-build-properties-maven-plugin/pom.xml b/dhp-build/dhp-build-properties-maven-plugin/pom.xml index 0dab4bf361..ad8cd57b4a 100644 --- a/dhp-build/dhp-build-properties-maven-plugin/pom.xml +++ b/dhp-build/dhp-build-properties-maven-plugin/pom.xml @@ -6,7 +6,7 @@ eu.dnetlib.dhp dhp-build - 1.2.1 + 1.2.2-SNAPSHOT dhp-build-properties-maven-plugin diff --git a/dhp-build/dhp-code-style/pom.xml b/dhp-build/dhp-code-style/pom.xml index 3e94ba7975..08f5de9ee8 100644 --- a/dhp-build/dhp-code-style/pom.xml +++ b/dhp-build/dhp-code-style/pom.xml @@ -5,7 +5,7 @@ eu.dnetlib.dhp dhp-code-style - 1.2.1 + 1.2.2-SNAPSHOT jar diff --git a/dhp-build/pom.xml b/dhp-build/pom.xml index 6e1751ec5d..369e25b24b 100644 --- a/dhp-build/pom.xml +++ b/dhp-build/pom.xml @@ -4,7 +4,7 @@ eu.dnetlib.dhp dhp - 1.2.1 + 1.2.2-SNAPSHOT dhp-build pom diff --git a/dhp-common/pom.xml b/dhp-common/pom.xml index 0fc78c9116..60e66f45a2 100644 --- a/dhp-common/pom.xml +++ b/dhp-common/pom.xml @@ -5,7 +5,7 @@ eu.dnetlib.dhp dhp - 1.2.1 + 1.2.2-SNAPSHOT ../ diff --git a/dhp-schemas/pom.xml b/dhp-schemas/pom.xml index 1f72251fa3..5e864cf940 100644 --- a/dhp-schemas/pom.xml +++ b/dhp-schemas/pom.xml @@ -5,7 +5,7 @@ eu.dnetlib.dhp dhp - 1.2.1 + 1.2.2-SNAPSHOT ../ diff --git a/dhp-workflows/dhp-actionmanager/pom.xml b/dhp-workflows/dhp-actionmanager/pom.xml index 0c0bbb1342..ec6247102e 100644 --- a/dhp-workflows/dhp-actionmanager/pom.xml +++ b/dhp-workflows/dhp-actionmanager/pom.xml @@ -4,7 +4,7 @@ eu.dnetlib.dhp dhp-workflows - 1.2.1 + 1.2.2-SNAPSHOT dhp-actionmanager diff --git a/dhp-workflows/dhp-aggregation/pom.xml b/dhp-workflows/dhp-aggregation/pom.xml index ae64a1cb85..9f082df70a 100644 --- a/dhp-workflows/dhp-aggregation/pom.xml +++ b/dhp-workflows/dhp-aggregation/pom.xml @@ -4,7 +4,7 @@ eu.dnetlib.dhp dhp-workflows - 1.2.1 + 1.2.2-SNAPSHOT dhp-aggregation diff --git a/dhp-workflows/dhp-blacklist/pom.xml b/dhp-workflows/dhp-blacklist/pom.xml index 89d7ec6a75..a3cc15b744 100644 --- a/dhp-workflows/dhp-blacklist/pom.xml +++ b/dhp-workflows/dhp-blacklist/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.1 + 1.2.2-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-broker-events/pom.xml b/dhp-workflows/dhp-broker-events/pom.xml index c851fbd2a7..fa1a14b4a0 100644 --- a/dhp-workflows/dhp-broker-events/pom.xml +++ b/dhp-workflows/dhp-broker-events/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.1 + 1.2.2-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-dedup-openaire/pom.xml b/dhp-workflows/dhp-dedup-openaire/pom.xml index 05a373507b..44cf9e67cc 100644 --- a/dhp-workflows/dhp-dedup-openaire/pom.xml +++ b/dhp-workflows/dhp-dedup-openaire/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.1 + 1.2.2-SNAPSHOT 4.0.0 dhp-dedup-openaire diff --git a/dhp-workflows/dhp-dedup-scholexplorer/pom.xml b/dhp-workflows/dhp-dedup-scholexplorer/pom.xml index 21069f42d2..429c8a6486 100644 --- a/dhp-workflows/dhp-dedup-scholexplorer/pom.xml +++ b/dhp-workflows/dhp-dedup-scholexplorer/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.1 + 1.2.2-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-distcp/pom.xml b/dhp-workflows/dhp-distcp/pom.xml index 7f15601aec..8454c29a4d 100644 --- a/dhp-workflows/dhp-distcp/pom.xml +++ b/dhp-workflows/dhp-distcp/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.1 + 1.2.2-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-enrichment/pom.xml b/dhp-workflows/dhp-enrichment/pom.xml index ecf00a7bc4..2dc0f24366 100644 --- a/dhp-workflows/dhp-enrichment/pom.xml +++ b/dhp-workflows/dhp-enrichment/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.1 + 1.2.2-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-graph-mapper/pom.xml b/dhp-workflows/dhp-graph-mapper/pom.xml index 066aea97ac..aee3d27c1d 100644 --- a/dhp-workflows/dhp-graph-mapper/pom.xml +++ b/dhp-workflows/dhp-graph-mapper/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.1 + 1.2.2-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-graph-provision-scholexplorer/pom.xml b/dhp-workflows/dhp-graph-provision-scholexplorer/pom.xml index 533093598d..e0ce739cfb 100644 --- a/dhp-workflows/dhp-graph-provision-scholexplorer/pom.xml +++ b/dhp-workflows/dhp-graph-provision-scholexplorer/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.1 + 1.2.2-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-graph-provision/pom.xml b/dhp-workflows/dhp-graph-provision/pom.xml index a78eab8ec9..62bf7186ce 100644 --- a/dhp-workflows/dhp-graph-provision/pom.xml +++ b/dhp-workflows/dhp-graph-provision/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.1 + 1.2.2-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-stats-update/pom.xml b/dhp-workflows/dhp-stats-update/pom.xml index a15f3efe15..d6ec4e6ab2 100644 --- a/dhp-workflows/dhp-stats-update/pom.xml +++ b/dhp-workflows/dhp-stats-update/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.1 + 1.2.2-SNAPSHOT 4.0.0 dhp-stats-update diff --git a/dhp-workflows/dhp-worfklow-profiles/pom.xml b/dhp-workflows/dhp-worfklow-profiles/pom.xml index b1c5da730d..cb20db57eb 100644 --- a/dhp-workflows/dhp-worfklow-profiles/pom.xml +++ b/dhp-workflows/dhp-worfklow-profiles/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.1 + 1.2.2-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/pom.xml b/dhp-workflows/pom.xml index fe7adf79b8..cf9753da40 100644 --- a/dhp-workflows/pom.xml +++ b/dhp-workflows/pom.xml @@ -6,7 +6,7 @@ eu.dnetlib.dhp dhp - 1.2.1 + 1.2.2-SNAPSHOT ../ diff --git a/pom.xml b/pom.xml index 77675b576d..e0ee189001 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ 4.0.0 eu.dnetlib.dhp dhp - 1.2.1 + 1.2.2-SNAPSHOT pom @@ -38,7 +38,7 @@ scm:git:gitea@code-repo.d4science.org:D-Net/dnet-hadoop.git scm:git:gitea@code-repo.d4science.org:D-Net/dnet-hadoop.git https://code-repo.d4science.org/D-Net/dnet-hadoop/ - dhp-1.2.1 + HEAD This module is the root descriptor for the dnet-hadoop project From b1546605e3a9a566167af7d7e795b320a4d21ea2 Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Tue, 26 May 2020 08:44:15 +0200 Subject: [PATCH 33/37] updated version of a dependency --- dhp-workflows/dhp-broker-events/pom.xml | 2 +- .../relatedPublications/AbstractEnrichMissingPublication.java | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-broker-events/pom.xml b/dhp-workflows/dhp-broker-events/pom.xml index 8b7ec38511..51ea4247b2 100644 --- a/dhp-workflows/dhp-broker-events/pom.xml +++ b/dhp-workflows/dhp-broker-events/pom.xml @@ -57,7 +57,7 @@ eu.dnetlib dnet-openaire-broker-common - [1.0.0,2.0.0) + [2.0.0,3.0.0) diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/AbstractEnrichMissingPublication.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/AbstractEnrichMissingPublication.java index 405b06ca6a..75e77b3c6f 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/AbstractEnrichMissingPublication.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/AbstractEnrichMissingPublication.java @@ -53,8 +53,7 @@ public abstract class AbstractEnrichMissingPublication return new UpdateInfo<>( getTopic(), highlightValue, source.getLeft(), target.getLeft(), - (p, rel) -> { - }, // p.getPublications().add(rel), //TODO available in the future release of dnet-openaire-broker-common + (p, rel) -> p.getPublications().add(rel), rel -> rel.getInstances().get(0).getUrl()); } From eea07f4c4208ad1eef36efea397308dc32f7bd54 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 26 May 2020 09:21:49 +0200 Subject: [PATCH 34/37] refactoring --- .../dnetlib/dhp/blacklist/BlackListTest.java | 1 - .../SparkOrcidToResultFromSemRelJob.java | 106 +++++++++--------- 2 files changed, 52 insertions(+), 55 deletions(-) diff --git a/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlackListTest.java b/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlackListTest.java index 0487a5844d..585848589c 100644 --- a/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlackListTest.java +++ b/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlackListTest.java @@ -61,7 +61,6 @@ public class BlackListTest { spark.stop(); } - @Test public void noRemoveTest() throws Exception { SparkRemoveBlacklistedRelationJob diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java index 34882b5872..3fc1270645 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java @@ -7,7 +7,6 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import java.util.List; import java.util.Optional; -import eu.dnetlib.dhp.common.PacePerson; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.apache.spark.SparkConf; @@ -23,6 +22,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.Lists; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.PacePerson; import eu.dnetlib.dhp.schema.oaf.Author; import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.StructuredProperty; @@ -124,78 +124,76 @@ public class SparkOrcidToResultFromSemRelJob { private static void enrichAuthor(Author a, List au) { PacePerson pp = new PacePerson(a.getFullname(), false); for (AutoritativeAuthor aa : au) { - if (enrichAuthor(aa, a, pp.getNormalisedFirstName(), pp.getNormalisedSurname() )) { + if (enrichAuthor(aa, a, pp.getNormalisedFirstName(), pp.getNormalisedSurname())) { return; } } } private static boolean enrichAuthor(AutoritativeAuthor autoritative_author, Author author, - String author_name, - String author_surname) { - boolean toaddpid = false; + String author_name, + String author_surname) { + boolean toaddpid = false; + if (StringUtils.isNotEmpty(autoritative_author.getSurname())) { + if (StringUtils.isNotEmpty(author.getSurname())) { + author_surname = author.getSurname(); + } + if (StringUtils.isNotEmpty(author_surname)) { + if (autoritative_author + .getSurname() + .trim() + .equalsIgnoreCase(author_surname.trim())) { - if (StringUtils.isNotEmpty(autoritative_author.getSurname())) { - if (StringUtils.isNotEmpty(author.getSurname())){ - author_surname = author.getSurname(); - } - if (StringUtils.isNotEmpty(author_surname)) { - if (autoritative_author - .getSurname() - .trim() - .equalsIgnoreCase(author_surname.trim())) { - - // have the same surname. Check the name - if (StringUtils.isNotEmpty(autoritative_author.getName())) { - if(StringUtils.isNotEmpty(author.getName())){ - author_name = author.getName(); + // have the same surname. Check the name + if (StringUtils.isNotEmpty(autoritative_author.getName())) { + if (StringUtils.isNotEmpty(author.getName())) { + author_name = author.getName(); + } + if (StringUtils.isNotEmpty(author_name)) { + if (autoritative_author + .getName() + .trim() + .equalsIgnoreCase(author_name.trim())) { + toaddpid = true; } - if (StringUtils.isNotEmpty(author_name)) { + // they could be differently written (i.e. only the initials of the name + // in one of the two + else { if (autoritative_author - .getName() - .trim() - .equalsIgnoreCase(author_name.trim())) { + .getName() + .trim() + .substring(0, 0) + .equalsIgnoreCase(author_name.trim().substring(0, 0))) { toaddpid = true; } - // they could be differently written (i.e. only the initials of the name - // in one of the two - else { - if (autoritative_author - .getName() - .trim() - .substring(0, 0) - .equalsIgnoreCase(author_name.trim().substring(0, 0))) { - toaddpid = true; - } - } } } } } } - if (toaddpid) { - StructuredProperty p = new StructuredProperty(); - p.setValue(autoritative_author.getOrcid()); - p.setQualifier(getQualifier(PROPAGATION_AUTHOR_PID, PROPAGATION_AUTHOR_PID)); - p - .setDataInfo( - getDataInfo( - PROPAGATION_DATA_INFO_TYPE, - PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_ID, - PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_NAME)); - - Optional> authorPid = Optional.ofNullable(author.getPid()); - if (authorPid.isPresent()) { - authorPid.get().add(p); - } else { - author.setPid(Lists.newArrayList(p)); - } - - } - return toaddpid; } + if (toaddpid) { + StructuredProperty p = new StructuredProperty(); + p.setValue(autoritative_author.getOrcid()); + p.setQualifier(getQualifier(PROPAGATION_AUTHOR_PID, PROPAGATION_AUTHOR_PID)); + p + .setDataInfo( + getDataInfo( + PROPAGATION_DATA_INFO_TYPE, + PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_ID, + PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_NAME)); + Optional> authorPid = Optional.ofNullable(author.getPid()); + if (authorPid.isPresent()) { + authorPid.get().add(p); + } else { + author.setPid(Lists.newArrayList(p)); + } + + } + return toaddpid; + } private static boolean containsAllowedPid(Author a) { Optional> pids = Optional.ofNullable(a.getPid()); From 7b288a94cba4aaaff32e256663ff71e37c28f651 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 26 May 2020 09:54:13 +0200 Subject: [PATCH 35/37] code formatting --- .../main/java/eu/dnetlib/dhp/schema/oaf/Result.java | 13 +++++++------ .../eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java | 4 ++-- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Result.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Result.java index 213a585a8c..11fdaa4f92 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Result.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Result.java @@ -244,20 +244,20 @@ public class Result extends OafEntity implements Serializable { subject = mergeLists(subject, r.getSubject()); - //merge title lists: main title with higher trust and distinct between the others + // merge title lists: main title with higher trust and distinct between the others StructuredProperty baseMainTitle = null; - if(title != null) { + if (title != null) { baseMainTitle = getMainTitle(title); title.remove(baseMainTitle); } StructuredProperty newMainTitle = null; - if(r.getTitle() != null) { + if (r.getTitle() != null) { newMainTitle = getMainTitle(r.getTitle()); r.getTitle().remove(newMainTitle); } - if (newMainTitle != null && compareTrust(this, r) < 0 ) + if (newMainTitle != null && compareTrust(this, r) < 0) baseMainTitle = newMainTitle; title = mergeLists(title, r.getTitle()); @@ -314,8 +314,9 @@ public class Result extends OafEntity implements Serializable { } private StructuredProperty getMainTitle(List titles) { - //need to check if the list of titles contains more than 1 main title? (in that case, we should chose which main title select in the list) - for (StructuredProperty title: titles) { + // need to check if the list of titles contains more than 1 main title? (in that case, we should chose which + // main title select in the list) + for (StructuredProperty title : titles) { if (title.getQualifier() != null && title.getQualifier().getClassid() != null) if (title.getQualifier().getClassid().equals("main title")) return title; diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java index f4b2c22522..b8ccb038d7 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java @@ -91,9 +91,9 @@ public class EntityMergerTest implements Serializable { assertEquals(pub_merged.getAuthor().size(), 9); assertEquals(AuthorMerger.countAuthorsPids(pub_merged.getAuthor()), 4); - //verify title + // verify title int count = 0; - for (StructuredProperty title: pub_merged.getTitle()){ + for (StructuredProperty title : pub_merged.getTitle()) { if (title.getQualifier().getClassid().equals("main title")) count++; } From 55595d723599a3d0b30fbec81742829ad801e9b0 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 26 May 2020 10:28:35 +0200 Subject: [PATCH 36/37] HACK: patch NULL values with defaults found in result.datainfo.deletedbyinference and result.context --- .../eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java index 1c65e8adec..4800def0ae 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java @@ -4,6 +4,7 @@ package eu.dnetlib.dhp.bulktag; import static eu.dnetlib.dhp.PropagationConstant.removeOutputDir; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import java.util.ArrayList; import java.util.Optional; import org.apache.commons.io.IOUtils; @@ -100,6 +101,7 @@ public class SparkBulkTagJob { ResultTagger resultTagger = new ResultTagger(); readPath(spark, inputPath, resultClazz) + .map(patchResult(), Encoders.bean(resultClazz)) .map( (MapFunction) value -> resultTagger .enrichContextCriteria( @@ -119,4 +121,17 @@ public class SparkBulkTagJob { .map((MapFunction) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz)); } + // TODO remove this hack as soon as the values fixed by this method will be provided as NON null + private static MapFunction patchResult() { + return (MapFunction) r -> { + if (r.getDataInfo().getDeletedbyinference() == null) { + r.getDataInfo().setDeletedbyinference(false); + } + if (r.getContext() == null) { + r.setContext(new ArrayList<>()); + } + return r; + }; + } + } From b8e541a454eb0745f42bd9e0ec869479633cd047 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 26 May 2020 10:30:09 +0200 Subject: [PATCH 37/37] fixing repeated organization.websiteurl in organization entities (#5645) as well as project.ecinternationalorganizationeurinterests --- .../eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java index 6f042b45cf..f99298130b 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java @@ -769,7 +769,7 @@ public class XmlRecordFactory implements Serializable { XmlSerializationUtils.asXmlElement("websiteurl", o.getWebsiteurl().getValue())); } if (o.getLogourl() != null) { - metadata.add(XmlSerializationUtils.asXmlElement("websiteurl", o.getLogourl().getValue())); + metadata.add(XmlSerializationUtils.asXmlElement("logourl", o.getLogourl().getValue())); } if (o.getEclegalbody() != null) { @@ -801,13 +801,13 @@ public class XmlRecordFactory implements Serializable { .asXmlElement( "echighereducation", o.getEchighereducation().getValue())); } - if (o.getEcinternationalorganization() != null) { + if (o.getEcinternationalorganizationeurinterests() != null) { metadata .add( XmlSerializationUtils .asXmlElement( "ecinternationalorganizationeurinterests", - o.getEcinternationalorganization().getValue())); + o.getEcinternationalorganizationeurinterests().getValue())); } if (o.getEcinternationalorganization() != null) { metadata