diff --git a/dhp-build/dhp-build-assembly-resources/pom.xml b/dhp-build/dhp-build-assembly-resources/pom.xml index 327c33d6f..8bae191d3 100644 --- a/dhp-build/dhp-build-assembly-resources/pom.xml +++ b/dhp-build/dhp-build-assembly-resources/pom.xml @@ -6,7 +6,7 @@ eu.dnetlib.dhp dhp-build - 1.2.1-SNAPSHOT + 1.2.2-SNAPSHOT dhp-build-assembly-resources diff --git a/dhp-build/dhp-build-properties-maven-plugin/pom.xml b/dhp-build/dhp-build-properties-maven-plugin/pom.xml index 873046e08..ad8cd57b4 100644 --- a/dhp-build/dhp-build-properties-maven-plugin/pom.xml +++ b/dhp-build/dhp-build-properties-maven-plugin/pom.xml @@ -6,7 +6,7 @@ eu.dnetlib.dhp dhp-build - 1.2.1-SNAPSHOT + 1.2.2-SNAPSHOT dhp-build-properties-maven-plugin diff --git a/dhp-build/dhp-code-style/pom.xml b/dhp-build/dhp-code-style/pom.xml index 8099a72e4..08f5de9ee 100644 --- a/dhp-build/dhp-code-style/pom.xml +++ b/dhp-build/dhp-code-style/pom.xml @@ -5,7 +5,7 @@ eu.dnetlib.dhp dhp-code-style - 1.2.1-SNAPSHOT + 1.2.2-SNAPSHOT jar diff --git a/dhp-build/pom.xml b/dhp-build/pom.xml index a700a2918..369e25b24 100644 --- a/dhp-build/pom.xml +++ b/dhp-build/pom.xml @@ -4,7 +4,7 @@ eu.dnetlib.dhp dhp - 1.2.1-SNAPSHOT + 1.2.2-SNAPSHOT dhp-build pom diff --git a/dhp-common/pom.xml b/dhp-common/pom.xml index c7cb11b08..60e66f45a 100644 --- a/dhp-common/pom.xml +++ b/dhp-common/pom.xml @@ -5,7 +5,7 @@ eu.dnetlib.dhp dhp - 1.2.1-SNAPSHOT + 1.2.2-SNAPSHOT ../ diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/PacePerson.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/PacePerson.java similarity index 99% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/PacePerson.java rename to dhp-common/src/main/java/eu/dnetlib/dhp/common/PacePerson.java index 6e474f2f3..1909ddcca 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/PacePerson.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/PacePerson.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.oa.graph.raw.common; +package eu.dnetlib.dhp.common; import java.nio.charset.StandardCharsets; import java.text.Normalizer; diff --git a/dhp-schemas/pom.xml b/dhp-schemas/pom.xml index fe5d0c431..5e864cf94 100644 --- a/dhp-schemas/pom.xml +++ b/dhp-schemas/pom.xml @@ -5,7 +5,7 @@ eu.dnetlib.dhp dhp - 1.2.1-SNAPSHOT + 1.2.2-SNAPSHOT ../ diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/LicenseComparator.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/LicenseComparator.java new file mode 100644 index 000000000..db523ad1a --- /dev/null +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/LicenseComparator.java @@ -0,0 +1,69 @@ + +package eu.dnetlib.dhp.schema.common; + +import java.util.Comparator; + +import eu.dnetlib.dhp.schema.oaf.Qualifier; + +public class LicenseComparator implements Comparator { + + @Override + public int compare(Qualifier left, Qualifier right) { + + if (left == null && right == null) + return 0; + if (left == null) + return 1; + if (right == null) + return -1; + + String lClass = left.getClassid(); + String rClass = right.getClassid(); + + if (lClass.equals(rClass)) + return 0; + + if (lClass.equals("OPEN SOURCE")) + return -1; + if (rClass.equals("OPEN SOURCE")) + return 1; + + if (lClass.equals("OPEN")) + return -1; + if (rClass.equals("OPEN")) + return 1; + + if (lClass.equals("6MONTHS")) + return -1; + if (rClass.equals("6MONTHS")) + return 1; + + if (lClass.equals("12MONTHS")) + return -1; + if (rClass.equals("12MONTHS")) + return 1; + + if (lClass.equals("EMBARGO")) + return -1; + if (rClass.equals("EMBARGO")) + return 1; + + if (lClass.equals("RESTRICTED")) + return -1; + if (rClass.equals("RESTRICTED")) + return 1; + + if (lClass.equals("CLOSED")) + return -1; + if (rClass.equals("CLOSED")) + return 1; + + if (lClass.equals("UNKNOWN")) + return -1; + if (rClass.equals("UNKNOWN")) + return 1; + + // Else (but unlikely), lexicographical ordering will do. + return lClass.compareTo(rClass); + } +} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/DataInfo.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/DataInfo.java index cc77e1ea0..9d572ee30 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/DataInfo.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/DataInfo.java @@ -8,7 +8,7 @@ public class DataInfo implements Serializable { private Boolean invisible = false; private Boolean inferred; - private Boolean deletedbyinference; + private Boolean deletedbyinference = false; private String trust; private String inferenceprovenance; private Qualifier provenanceaction; diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Field.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Field.java index 1a85c6842..8358bc4b3 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Field.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Field.java @@ -2,6 +2,7 @@ package eu.dnetlib.dhp.schema.oaf; import java.io.Serializable; +import java.util.Objects; public class Field implements Serializable { @@ -39,6 +40,6 @@ public class Field implements Serializable { if (getClass() != obj.getClass()) return false; Field other = (Field) obj; - return getValue().equals(other.getValue()); + return Objects.equals(getValue(), other.getValue()); } } diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/OafEntity.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/OafEntity.java index 09742748d..2823ee49d 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/OafEntity.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/OafEntity.java @@ -106,6 +106,7 @@ public abstract class OafEntity extends Oaf implements Serializable { .stream(lists) .filter(Objects::nonNull) .flatMap(List::stream) + .filter(Objects::nonNull) .distinct() .collect(Collectors.toList()); } diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Result.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Result.java index 11316f36e..9d723f998 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Result.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Result.java @@ -256,7 +256,25 @@ public class Result extends OafEntity implements Serializable { subject = mergeLists(subject, r.getSubject()); + // merge title lists: main title with higher trust and distinct between the others + StructuredProperty baseMainTitle = null; + if (title != null) { + baseMainTitle = getMainTitle(title); + title.remove(baseMainTitle); + } + + StructuredProperty newMainTitle = null; + if (r.getTitle() != null) { + newMainTitle = getMainTitle(r.getTitle()); + r.getTitle().remove(newMainTitle); + } + + if (newMainTitle != null && compareTrust(this, r) < 0) + baseMainTitle = newMainTitle; + title = mergeLists(title, r.getTitle()); + if (title != null && baseMainTitle != null) + title.add(baseMainTitle); relevantdate = mergeLists(relevantdate, r.getRelevantdate()); @@ -306,4 +324,15 @@ public class Result extends OafEntity implements Serializable { } return a.size() > b.size() ? a : b; } + + private StructuredProperty getMainTitle(List titles) { + // need to check if the list of titles contains more than 1 main title? (in that case, we should chose which + // main title select in the list) + for (StructuredProperty title : titles) { + if (title.getQualifier() != null && title.getQualifier().getClassid() != null) + if (title.getQualifier().getClassid().equals("main title")) + return title; + } + return null; + } } diff --git a/dhp-workflows/dhp-actionmanager/pom.xml b/dhp-workflows/dhp-actionmanager/pom.xml index 22a81f7da..ec6247102 100644 --- a/dhp-workflows/dhp-actionmanager/pom.xml +++ b/dhp-workflows/dhp-actionmanager/pom.xml @@ -4,7 +4,7 @@ eu.dnetlib.dhp dhp-workflows - 1.2.1-SNAPSHOT + 1.2.2-SNAPSHOT dhp-actionmanager diff --git a/dhp-workflows/dhp-aggregation/pom.xml b/dhp-workflows/dhp-aggregation/pom.xml index 361517af9..282ca476d 100644 --- a/dhp-workflows/dhp-aggregation/pom.xml +++ b/dhp-workflows/dhp-aggregation/pom.xml @@ -4,7 +4,7 @@ eu.dnetlib.dhp dhp-workflows - 1.2.1-SNAPSHOT + 1.2.2-SNAPSHOT dhp-aggregation diff --git a/dhp-workflows/dhp-blacklist/pom.xml b/dhp-workflows/dhp-blacklist/pom.xml index 37abc22f6..a3cc15b74 100644 --- a/dhp-workflows/dhp-blacklist/pom.xml +++ b/dhp-workflows/dhp-blacklist/pom.xml @@ -1,11 +1,9 @@ - + dhp-workflows eu.dnetlib.dhp - 1.2.1-SNAPSHOT + 1.2.2-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlackListTest.java b/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlackListTest.java index 0487a5844..585848589 100644 --- a/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlackListTest.java +++ b/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlackListTest.java @@ -61,7 +61,6 @@ public class BlackListTest { spark.stop(); } - @Test public void noRemoveTest() throws Exception { SparkRemoveBlacklistedRelationJob diff --git a/dhp-workflows/dhp-broker-events/pom.xml b/dhp-workflows/dhp-broker-events/pom.xml index 8b7ec3851..0e84d99a4 100644 --- a/dhp-workflows/dhp-broker-events/pom.xml +++ b/dhp-workflows/dhp-broker-events/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.1-SNAPSHOT + 1.2.2-SNAPSHOT 4.0.0 @@ -57,7 +57,7 @@ eu.dnetlib dnet-openaire-broker-common - [1.0.0,2.0.0) + [2.0.0,3.0.0) diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/Topic.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/Topic.java index 98088dd0a..0716bd98d 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/Topic.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/Topic.java @@ -4,48 +4,45 @@ package eu.dnetlib.dhp.broker.model; public enum Topic { // ENRICHMENT MISSING - ENRICH_MISSING_OA_VERSION("ENRICH/MISSING/OPENACCESS_VERSION"), - ENRICH_MISSING_ABSTRACT("ENRICH/MISSING/ABSTRACT"), - ENRICH_MISSING_PUBLICATION_DATE("ENRICH/MISSING/PUBLICATION_DATE"), - ENRICH_MISSING_PID("ENRICH/MISSING/PID"), - ENRICH_MISSING_PROJECT("ENRICH/MISSING/PROJECT"), - ENRICH_MISSING_SOFTWARE("ENRICH/MISSING/SOFTWARE"), - ENRICH_MISSING_SUBJECT_MESHEUROPMC("ENRICH/MISSING/SUBJECT/MESHEUROPMC"), - ENRICH_MISSING_SUBJECT_ARXIV("ENRICH/MISSING/SUBJECT/ARXIV"), - ENRICH_MISSING_SUBJECT_JEL("ENRICH/MISSING/SUBJECT/JEL"), - ENRICH_MISSING_SUBJECT_DDC("ENRICH/MISSING/SUBJECT/DDC"), - ENRICH_MISSING_SUBJECT_ACM("ENRICH/MISSING/SUBJECT/ACM"), - ENRICH_MISSING_SUBJECT_RVK("ENRICH/MISSING/SUBJECT/RVK"), - ENRICH_MISSING_AUTHOR_ORCID("ENRICH/MISSING/AUTHOR/ORCID"), + ENRICH_MISSING_OA_VERSION("ENRICH/MISSING/OPENACCESS_VERSION"), ENRICH_MISSING_ABSTRACT( + "ENRICH/MISSING/ABSTRACT"), ENRICH_MISSING_PUBLICATION_DATE( + "ENRICH/MISSING/PUBLICATION_DATE"), ENRICH_MISSING_PID( + "ENRICH/MISSING/PID"), ENRICH_MISSING_PROJECT("ENRICH/MISSING/PROJECT"), ENRICH_MISSING_SOFTWARE( + "ENRICH/MISSING/SOFTWARE"), ENRICH_MISSING_SUBJECT_MESHEUROPMC( + "ENRICH/MISSING/SUBJECT/MESHEUROPMC"), ENRICH_MISSING_SUBJECT_ARXIV( + "ENRICH/MISSING/SUBJECT/ARXIV"), ENRICH_MISSING_SUBJECT_JEL( + "ENRICH/MISSING/SUBJECT/JEL"), ENRICH_MISSING_SUBJECT_DDC( + "ENRICH/MISSING/SUBJECT/DDC"), ENRICH_MISSING_SUBJECT_ACM( + "ENRICH/MISSING/SUBJECT/ACM"), ENRICH_MISSING_SUBJECT_RVK( + "ENRICH/MISSING/SUBJECT/RVK"), ENRICH_MISSING_AUTHOR_ORCID( + "ENRICH/MISSING/AUTHOR/ORCID"), // ENRICHMENT MORE - ENRICH_MORE_PID("ENRICH/MORE/PID"), - ENRICH_MORE_OA_VERSION("ENRICH/MORE/OPENACCESS_VERSION"), - ENRICH_MORE_ABSTRACT("ENRICH/MORE/ABSTRACT"), - ENRICH_MORE_PUBLICATION_DATE("ENRICH/MORE/PUBLICATION_DATE"), - ENRICH_MORE_PROJECT("ENRICH/MORE/PROJECT"), - ENRICH_MORE_SUBJECT_MESHEUROPMC("ENRICH/MORE/SUBJECT/MESHEUROPMC"), - ENRICH_MORE_SUBJECT_ARXIV("ENRICH/MORE/SUBJECT/ARXIV"), - ENRICH_MORE_SUBJECT_JEL("ENRICH/MORE/SUBJECT/JEL"), - ENRICH_MORE_SUBJECT_DDC("ENRICH/MORE/SUBJECT/DDC"), - ENRICH_MORE_SUBJECT_ACM("ENRICH/MORE/SUBJECT/ACM"), - ENRICH_MORE_SUBJECT_RVK("ENRICH/MORE/SUBJECT/RVK"), + ENRICH_MORE_PID("ENRICH/MORE/PID"), ENRICH_MORE_OA_VERSION("ENRICH/MORE/OPENACCESS_VERSION"), ENRICH_MORE_ABSTRACT( + "ENRICH/MORE/ABSTRACT"), ENRICH_MORE_PUBLICATION_DATE("ENRICH/MORE/PUBLICATION_DATE"), ENRICH_MORE_PROJECT( + "ENRICH/MORE/PROJECT"), ENRICH_MORE_SOFTWARE("ENRICH/MORE/SOFTWARE"), ENRICH_MORE_SUBJECT_MESHEUROPMC( + "ENRICH/MORE/SUBJECT/MESHEUROPMC"), ENRICH_MORE_SUBJECT_ARXIV( + "ENRICH/MORE/SUBJECT/ARXIV"), ENRICH_MORE_SUBJECT_JEL( + "ENRICH/MORE/SUBJECT/JEL"), ENRICH_MORE_SUBJECT_DDC( + "ENRICH/MORE/SUBJECT/DDC"), ENRICH_MORE_SUBJECT_ACM( + "ENRICH/MORE/SUBJECT/ACM"), ENRICH_MORE_SUBJECT_RVK("ENRICH/MORE/SUBJECT/RVK"), // ADDITION ADD_BY_PROJECT("ADD/BY_PROJECT"), // OTHER RELS - ENRICH_MISSING_PUBLICATION_IS_RELATED_TO("ENRICH/MISSING/PUBLICATION/IS_RELATED_TO"), - ENRICH_MISSING_PUBLICATION_REFERENCES("ENRICH/MISSING/PUBLICATION/REFERENCES"), - ENRICH_MISSING_PUBLICATION_IS_REFERENCED_BY("ENRICH/MISSING/PUBLICATION/IS_REFERENCED_BY"), - ENRICH_MISSING_PUBLICATION_IS_SUPPLEMENTED_TO("ENRICH/MISSING/PUBLICATION/IS_SUPPLEMENTED_TO"), - ENRICH_MISSING_PUBLICATION_IS_SUPPLEMENTED_BY("ENRICH/MISSING/PUBLICATION/IS_SUPPLEMENTED_BY"), + ENRICH_MISSING_PUBLICATION_IS_RELATED_TO( + "ENRICH/MISSING/PUBLICATION/IS_RELATED_TO"), ENRICH_MISSING_PUBLICATION_REFERENCES( + "ENRICH/MISSING/PUBLICATION/REFERENCES"), ENRICH_MISSING_PUBLICATION_IS_REFERENCED_BY( + "ENRICH/MISSING/PUBLICATION/IS_REFERENCED_BY"), ENRICH_MISSING_PUBLICATION_IS_SUPPLEMENTED_TO( + "ENRICH/MISSING/PUBLICATION/IS_SUPPLEMENTED_TO"), ENRICH_MISSING_PUBLICATION_IS_SUPPLEMENTED_BY( + "ENRICH/MISSING/PUBLICATION/IS_SUPPLEMENTED_BY"), - ENRICH_MISSING_DATASET_IS_RELATED_TO("ENRICH/MISSING/DATASET/IS_RELATED_TO"), - ENRICH_MISSING_DATASET_REFERENCES("ENRICH/MISSING/DATASET/REFERENCES"), - ENRICH_MISSING_DATASET_IS_REFERENCED_BY("ENRICH/MISSING/DATASET/IS_REFERENCED_BY"), - ENRICH_MISSING_DATASET_IS_SUPPLEMENTED_TO("ENRICH/MISSING/DATASET/IS_SUPPLEMENTED_TO"), - ENRICH_MISSING_DATASET_IS_SUPPLEMENTED_BY("ENRICH/MISSING/DATASET/IS_SUPPLEMENTED_BY"); + ENRICH_MISSING_DATASET_IS_RELATED_TO("ENRICH/MISSING/DATASET/IS_RELATED_TO"), ENRICH_MISSING_DATASET_REFERENCES( + "ENRICH/MISSING/DATASET/REFERENCES"), ENRICH_MISSING_DATASET_IS_REFERENCED_BY( + "ENRICH/MISSING/DATASET/IS_REFERENCED_BY"), ENRICH_MISSING_DATASET_IS_SUPPLEMENTED_TO( + "ENRICH/MISSING/DATASET/IS_SUPPLEMENTED_TO"), ENRICH_MISSING_DATASET_IS_SUPPLEMENTED_BY( + "ENRICH/MISSING/DATASET/IS_SUPPLEMENTED_BY"),; Topic(final String path) { this.path = path; @@ -59,7 +56,9 @@ public enum Topic { public static Topic fromPath(final String path) { for (final Topic t : Topic.values()) { - if (t.getPath().equals(path)) { return t; } + if (t.getPath().equals(path)) { + return t; + } } return null; } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java index 5fdd10925..d5e577972 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java @@ -4,11 +4,14 @@ package eu.dnetlib.dhp.broker.oa; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.util.ArrayList; +import java.util.Collection; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; @@ -27,20 +30,35 @@ import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.broker.model.Event; import eu.dnetlib.dhp.broker.model.EventFactory; -import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingAbstract; -import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingAuthorOrcid; -import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingOpenAccess; -import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingPid; -import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingProject; -import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingPublicationDate; -import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingSubject; -import eu.dnetlib.dhp.broker.oa.matchers.EnrichMoreOpenAccess; -import eu.dnetlib.dhp.broker.oa.matchers.EnrichMorePid; -import eu.dnetlib.dhp.broker.oa.matchers.EnrichMoreSubject; import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; +import eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets.EnrichMissingDatasetIsReferencedBy; +import eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets.EnrichMissingDatasetIsRelatedTo; +import eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets.EnrichMissingDatasetIsSupplementedBy; +import eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets.EnrichMissingDatasetIsSupplementedTo; +import eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets.EnrichMissingDatasetReferences; +import eu.dnetlib.dhp.broker.oa.matchers.relatedProjects.EnrichMissingProject; +import eu.dnetlib.dhp.broker.oa.matchers.relatedProjects.EnrichMoreProject; +import eu.dnetlib.dhp.broker.oa.matchers.relatedPublications.EnrichMissingPublicationIsReferencedBy; +import eu.dnetlib.dhp.broker.oa.matchers.relatedPublications.EnrichMissingPublicationIsRelatedTo; +import eu.dnetlib.dhp.broker.oa.matchers.relatedPublications.EnrichMissingPublicationIsSupplementedBy; +import eu.dnetlib.dhp.broker.oa.matchers.relatedPublications.EnrichMissingPublicationIsSupplementedTo; +import eu.dnetlib.dhp.broker.oa.matchers.relatedPublications.EnrichMissingPublicationReferences; +import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMissingAbstract; +import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMissingAuthorOrcid; +import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMissingOpenAccess; +import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMissingPid; +import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMissingPublicationDate; +import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMissingSoftware; +import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMissingSubject; +import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMoreOpenAccess; +import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMorePid; +import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMoreSoftware; +import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMoreSubject; +import eu.dnetlib.dhp.broker.oa.util.BrokerConstants; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct; +import eu.dnetlib.dhp.schema.oaf.Project; import eu.dnetlib.dhp.schema.oaf.Publication; import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.schema.oaf.Result; @@ -50,24 +68,44 @@ public class GenerateEventsApplication { private static final Logger log = LoggerFactory.getLogger(GenerateEventsApplication.class); - private static final UpdateMatcher enrichMissingAbstract = new EnrichMissingAbstract(); - private static final UpdateMatcher enrichMissingAuthorOrcid = new EnrichMissingAuthorOrcid(); - private static final UpdateMatcher enrichMissingOpenAccess = new EnrichMissingOpenAccess(); - private static final UpdateMatcher enrichMissingPid = new EnrichMissingPid(); - private static final UpdateMatcher enrichMissingProject = new EnrichMissingProject(); - private static final UpdateMatcher enrichMissingPublicationDate = new EnrichMissingPublicationDate(); - private static final UpdateMatcher enrichMissingSubject = new EnrichMissingSubject(); - private static final UpdateMatcher enrichMoreOpenAccess = new EnrichMoreOpenAccess(); - private static final UpdateMatcher enrichMorePid = new EnrichMorePid(); - private static final UpdateMatcher enrichMoreSubject = new EnrichMoreSubject(); + // Simple Matchers + private static final UpdateMatcher enrichMissingAbstract = new EnrichMissingAbstract(); + private static final UpdateMatcher enrichMissingAuthorOrcid = new EnrichMissingAuthorOrcid(); + private static final UpdateMatcher enrichMissingOpenAccess = new EnrichMissingOpenAccess(); + private static final UpdateMatcher enrichMissingPid = new EnrichMissingPid(); + private static final UpdateMatcher enrichMissingPublicationDate = new EnrichMissingPublicationDate(); + private static final UpdateMatcher enrichMissingSubject = new EnrichMissingSubject(); + private static final UpdateMatcher enrichMoreOpenAccess = new EnrichMoreOpenAccess(); + private static final UpdateMatcher enrichMorePid = new EnrichMorePid(); + private static final UpdateMatcher enrichMoreSubject = new EnrichMoreSubject(); + + // Advanced matchers + private static final UpdateMatcher>, ?> enrichMissingProject = new EnrichMissingProject(); + private static final UpdateMatcher>, ?> enrichMoreProject = new EnrichMoreProject(); + + private static final UpdateMatcher>, ?> enrichMissingSoftware = new EnrichMissingSoftware(); + private static final UpdateMatcher>, ?> enrichMoreSoftware = new EnrichMoreSoftware(); + + private static final UpdateMatcher>, ?> enrichMisissingPublicationIsRelatedTo = new EnrichMissingPublicationIsRelatedTo(); + private static final UpdateMatcher>, ?> enrichMissingPublicationIsReferencedBy = new EnrichMissingPublicationIsReferencedBy(); + private static final UpdateMatcher>, ?> enrichMissingPublicationReferences = new EnrichMissingPublicationReferences(); + private static final UpdateMatcher>, ?> enrichMissingPublicationIsSupplementedTo = new EnrichMissingPublicationIsSupplementedTo(); + private static final UpdateMatcher>, ?> enrichMissingPublicationIsSupplementedBy = new EnrichMissingPublicationIsSupplementedBy(); + + private static final UpdateMatcher>, ?> enrichMisissingDatasetIsRelatedTo = new EnrichMissingDatasetIsRelatedTo(); + private static final UpdateMatcher>, ?> enrichMissingDatasetIsReferencedBy = new EnrichMissingDatasetIsReferencedBy(); + private static final UpdateMatcher>, ?> enrichMissingDatasetReferences = new EnrichMissingDatasetReferences(); + private static final UpdateMatcher>, ?> enrichMissingDatasetIsSupplementedTo = new EnrichMissingDatasetIsSupplementedTo(); + private static final UpdateMatcher>, ?> enrichMissingDatasetIsSupplementedBy = new EnrichMissingDatasetIsSupplementedBy(); public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); public static void main(final String[] args) throws Exception { final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils - .toString(GenerateEventsApplication.class - .getResourceAsStream("/eu/dnetlib/dhp/oa/graph/merge_claims_parameters.json"))); + .toString( + GenerateEventsApplication.class + .getResourceAsStream("/eu/dnetlib/dhp/oa/graph/merge_claims_parameters.json"))); parser.parseArgument(args); final Boolean isSparkSessionManaged = Optional @@ -82,9 +120,6 @@ public class GenerateEventsApplication { final String eventsPath = parser.get("eventsPath"); log.info("eventsPath: {}", eventsPath); - final String resultClassName = parser.get("resultTableName"); - log.info("resultTableName: {}", resultClassName); - final SparkConf conf = new SparkConf(); runWithSparkSession(conf, isSparkSessionManaged, spark -> { @@ -111,17 +146,17 @@ public class GenerateEventsApplication { final String graphPath, final Class resultClazz) { - final Dataset results = - readPath(spark, graphPath + "/" + resultClazz.getSimpleName().toLowerCase(), resultClazz) + final Dataset results = readPath( + spark, graphPath + "/" + resultClazz.getSimpleName().toLowerCase(), resultClazz) .filter(r -> r.getDataInfo().getDeletedbyinference()); - final Dataset rels = - readPath(spark, graphPath + "/relation", Relation.class) - .filter(r -> r.getRelClass().equals("TODO")); // TODO mergedIN + final Dataset rels = readPath(spark, graphPath + "/relation", Relation.class) + .filter(r -> r.getRelClass().equals(BrokerConstants.IS_MERGED_IN_CLASS)); final Column c = null; // TODO - final Dataset aa = results.joinWith(rels, results.col("id").equalTo(rels.col("source")), "inner") + final Dataset aa = results + .joinWith(rels, results.col("id").equalTo(rels.col("source")), "inner") .groupBy(rels.col("target")) .agg(c) .filter(x -> x.size() > 1) @@ -134,7 +169,7 @@ public class GenerateEventsApplication { } - private List generateSimpleEvents(final Result... children) { + private List generateSimpleEvents(final Collection children) { final List> list = new ArrayList<>(); for (final Result target : children) { @@ -142,7 +177,6 @@ public class GenerateEventsApplication { list.addAll(enrichMissingAuthorOrcid.searchUpdatesForRecord(target, children)); list.addAll(enrichMissingOpenAccess.searchUpdatesForRecord(target, children)); list.addAll(enrichMissingPid.searchUpdatesForRecord(target, children)); - list.addAll(enrichMissingProject.searchUpdatesForRecord(target, children)); list.addAll(enrichMissingPublicationDate.searchUpdatesForRecord(target, children)); list.addAll(enrichMissingSubject.searchUpdatesForRecord(target, children)); list.addAll(enrichMoreOpenAccess.searchUpdatesForRecord(target, children)); @@ -153,6 +187,87 @@ public class GenerateEventsApplication { return list.stream().map(EventFactory::newBrokerEvent).collect(Collectors.toList()); } + private List generateProjectsEvents(final Collection>> childrenWithProjects) { + final List> list = new ArrayList<>(); + + for (final Pair> target : childrenWithProjects) { + list.addAll(enrichMissingProject.searchUpdatesForRecord(target, childrenWithProjects)); + list.addAll(enrichMoreProject.searchUpdatesForRecord(target, childrenWithProjects)); + } + + return list.stream().map(EventFactory::newBrokerEvent).collect(Collectors.toList()); + } + + private List generateSoftwareEvents(final Collection>> childrenWithSoftwares) { + final List> list = new ArrayList<>(); + + for (final Pair> target : childrenWithSoftwares) { + list.addAll(enrichMissingSoftware.searchUpdatesForRecord(target, childrenWithSoftwares)); + list.addAll(enrichMoreSoftware.searchUpdatesForRecord(target, childrenWithSoftwares)); + } + return list.stream().map(EventFactory::newBrokerEvent).collect(Collectors.toList()); + } + + private List generatePublicationRelatedEvents(final String relType, + final Collection>>> childrenWithRels) { + + final List> list = new ArrayList<>(); + + final List>> cleanedChildrens = childrenWithRels + .stream() + .filter(p -> p.getRight().containsKey(relType)) + .map(p -> Pair.of(p.getLeft(), p.getRight().get(relType))) + .filter(p -> p.getRight().size() > 0) + .collect(Collectors.toList()); + + for (final Pair> target : cleanedChildrens) { + if (relType.equals("isRelatedTo")) { + list.addAll(enrichMisissingPublicationIsRelatedTo.searchUpdatesForRecord(target, cleanedChildrens)); + } else if (relType.equals("references")) { + list.addAll(enrichMissingPublicationReferences.searchUpdatesForRecord(target, cleanedChildrens)); + } else if (relType.equals("isReferencedBy")) { + list.addAll(enrichMissingPublicationIsReferencedBy.searchUpdatesForRecord(target, cleanedChildrens)); + } else if (relType.equals("isSupplementedTo")) { + list.addAll(enrichMissingPublicationIsSupplementedTo.searchUpdatesForRecord(target, cleanedChildrens)); + } else if (relType.equals("isSupplementedBy")) { + list.addAll(enrichMissingPublicationIsSupplementedBy.searchUpdatesForRecord(target, cleanedChildrens)); + } + } + + return list.stream().map(EventFactory::newBrokerEvent).collect(Collectors.toList()); + + } + + private List generateDatasetRelatedEvents(final String relType, + final Collection>>> childrenWithRels) { + + final List> list = new ArrayList<>(); + + final List>> cleanedChildrens = childrenWithRels + .stream() + .filter(p -> p.getRight().containsKey(relType)) + .map(p -> Pair.of(p.getLeft(), p.getRight().get(relType))) + .filter(p -> p.getRight().size() > 0) + .collect(Collectors.toList()); + + for (final Pair> target : cleanedChildrens) { + if (relType.equals("isRelatedTo")) { + list.addAll(enrichMisissingDatasetIsRelatedTo.searchUpdatesForRecord(target, cleanedChildrens)); + } else if (relType.equals("references")) { + list.addAll(enrichMissingDatasetReferences.searchUpdatesForRecord(target, cleanedChildrens)); + } else if (relType.equals("isReferencedBy")) { + list.addAll(enrichMissingDatasetIsReferencedBy.searchUpdatesForRecord(target, cleanedChildrens)); + } else if (relType.equals("isSupplementedTo")) { + list.addAll(enrichMissingDatasetIsSupplementedTo.searchUpdatesForRecord(target, cleanedChildrens)); + } else if (relType.equals("isSupplementedBy")) { + list.addAll(enrichMissingDatasetIsSupplementedBy.searchUpdatesForRecord(target, cleanedChildrens)); + } + } + + return list.stream().map(EventFactory::newBrokerEvent).collect(Collectors.toList()); + + } + public static Dataset readPath( final SparkSession spark, final String inputPath, diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingProject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingProject.java deleted file mode 100644 index b6e5b3b57..000000000 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingProject.java +++ /dev/null @@ -1,35 +0,0 @@ - -package eu.dnetlib.dhp.broker.oa.matchers; - -import java.util.Arrays; -import java.util.List; - -import eu.dnetlib.broker.objects.Project; -import eu.dnetlib.dhp.broker.model.Topic; -import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; -import eu.dnetlib.dhp.schema.oaf.Result; - -public class EnrichMissingProject extends UpdateMatcher { - - public EnrichMissingProject() { - super(true); - } - - @Override - protected List> findUpdates(final Result source, final Result target) { - // return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f)); - return Arrays.asList(); - } - - @Override - public UpdateInfo generateUpdateInfo(final Project highlightValue, - final Result source, - final Result target) { - return new UpdateInfo<>( - Topic.ENRICH_MISSING_PROJECT, - highlightValue, source, target, - (p, prj) -> p.getProjects().add(prj), - prj -> prj.getFunder() + "::" + prj.getFundingProgram() + prj.getCode()); - } - -} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcher.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcher.java index d91b03200..5bfe108a5 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcher.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcher.java @@ -12,9 +12,8 @@ import org.apache.commons.lang3.StringUtils; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.schema.oaf.Field; -import eu.dnetlib.dhp.schema.oaf.Result; -public abstract class UpdateMatcher { +public abstract class UpdateMatcher { private final boolean multipleUpdate; @@ -22,15 +21,16 @@ public abstract class UpdateMatcher { this.multipleUpdate = multipleUpdate; } - public Collection> searchUpdatesForRecord(final Result res, final Result... others) { + public Collection> searchUpdatesForRecord(final K res, final Collection others) { final Map> infoMap = new HashMap<>(); - for (final Result source : others) { + for (final K source : others) { if (source != res) { for (final UpdateInfo info : findUpdates(source, res)) { final String s = DigestUtils.md5Hex(info.getHighlightValueAsString()); - if (!infoMap.containsKey(s) || infoMap.get(s).getTrust() < info.getTrust()) {} else { + if (!infoMap.containsKey(s) || infoMap.get(s).getTrust() < info.getTrust()) { + } else { infoMap.put(s, info); } } @@ -51,11 +51,11 @@ public abstract class UpdateMatcher { } } - protected abstract List> findUpdates(Result source, Result target); + protected abstract List> findUpdates(K source, K target); protected abstract UpdateInfo generateUpdateInfo(final T highlightValue, - final Result source, - final Result target); + final K source, + final K target); protected static boolean isMissing(final List> list) { return list == null || list.isEmpty() || StringUtils.isBlank(list.get(0).getValue()); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/AbstractEnrichMissingDataset.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/AbstractEnrichMissingDataset.java new file mode 100644 index 000000000..321fd4318 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/AbstractEnrichMissingDataset.java @@ -0,0 +1,63 @@ + +package eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets; + +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; + +import org.apache.commons.lang3.tuple.Pair; + +import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; +import eu.dnetlib.dhp.broker.oa.util.ConversionUtils; +import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; +import eu.dnetlib.dhp.schema.oaf.Dataset; +import eu.dnetlib.dhp.schema.oaf.Result; + +public abstract class AbstractEnrichMissingDataset + extends UpdateMatcher>, eu.dnetlib.broker.objects.Dataset> { + + private final Topic topic; + + public AbstractEnrichMissingDataset(final Topic topic) { + super(true); + this.topic = topic; + } + + @Override + protected final List> findUpdates( + final Pair> source, + final Pair> target) { + + final Set existingDatasets = target + .getRight() + .stream() + .map(Dataset::getId) + .collect(Collectors.toSet()); + + return source + .getRight() + .stream() + .filter(d -> !existingDatasets.contains(d.getId())) + .map(ConversionUtils::oafDatasetToBrokerDataset) + .map(i -> generateUpdateInfo(i, source, target)) + .collect(Collectors.toList()); + + } + + @Override + protected final UpdateInfo generateUpdateInfo( + final eu.dnetlib.broker.objects.Dataset highlightValue, + final Pair> source, + final Pair> target) { + return new UpdateInfo<>( + getTopic(), + highlightValue, source.getLeft(), target.getLeft(), + (p, rel) -> p.getDatasets().add(rel), + rel -> rel.getInstances().get(0).getUrl()); + } + + public Topic getTopic() { + return topic; + } +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsReferencedBy.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsReferencedBy.java new file mode 100644 index 000000000..74ce761f4 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsReferencedBy.java @@ -0,0 +1,12 @@ + +package eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets; + +import eu.dnetlib.dhp.broker.model.Topic; + +public class EnrichMissingDatasetIsReferencedBy extends AbstractEnrichMissingDataset { + + public EnrichMissingDatasetIsReferencedBy() { + super(Topic.ENRICH_MISSING_DATASET_IS_REFERENCED_BY); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsRelatedTo.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsRelatedTo.java new file mode 100644 index 000000000..05a891059 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsRelatedTo.java @@ -0,0 +1,12 @@ + +package eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets; + +import eu.dnetlib.dhp.broker.model.Topic; + +public class EnrichMissingDatasetIsRelatedTo extends AbstractEnrichMissingDataset { + + public EnrichMissingDatasetIsRelatedTo() { + super(Topic.ENRICH_MISSING_DATASET_IS_RELATED_TO); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsSupplementedBy.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsSupplementedBy.java new file mode 100644 index 000000000..23bd68fa1 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsSupplementedBy.java @@ -0,0 +1,12 @@ + +package eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets; + +import eu.dnetlib.dhp.broker.model.Topic; + +public class EnrichMissingDatasetIsSupplementedBy extends AbstractEnrichMissingDataset { + + public EnrichMissingDatasetIsSupplementedBy() { + super(Topic.ENRICH_MISSING_DATASET_IS_SUPPLEMENTED_BY); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsSupplementedTo.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsSupplementedTo.java new file mode 100644 index 000000000..03160b6f0 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsSupplementedTo.java @@ -0,0 +1,12 @@ + +package eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets; + +import eu.dnetlib.dhp.broker.model.Topic; + +public class EnrichMissingDatasetIsSupplementedTo extends AbstractEnrichMissingDataset { + + public EnrichMissingDatasetIsSupplementedTo() { + super(Topic.ENRICH_MISSING_DATASET_IS_SUPPLEMENTED_TO); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetReferences.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetReferences.java new file mode 100644 index 000000000..bf1df053d --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetReferences.java @@ -0,0 +1,12 @@ + +package eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets; + +import eu.dnetlib.dhp.broker.model.Topic; + +public class EnrichMissingDatasetReferences extends AbstractEnrichMissingDataset { + + public EnrichMissingDatasetReferences() { + super(Topic.ENRICH_MISSING_DATASET_REFERENCES); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMissingProject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMissingProject.java new file mode 100644 index 000000000..461266d56 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMissingProject.java @@ -0,0 +1,41 @@ + +package eu.dnetlib.dhp.broker.oa.matchers.relatedProjects; + +import java.util.Arrays; +import java.util.List; + +import org.apache.commons.lang3.tuple.Pair; + +import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; +import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; +import eu.dnetlib.dhp.schema.oaf.Project; +import eu.dnetlib.dhp.schema.oaf.Result; + +public class EnrichMissingProject + extends UpdateMatcher>, eu.dnetlib.broker.objects.Project> { + + public EnrichMissingProject() { + super(true); + } + + @Override + protected List> findUpdates(final Pair> source, + final Pair> target) { + // TODO + return Arrays.asList(); + } + + @Override + public UpdateInfo generateUpdateInfo( + final eu.dnetlib.broker.objects.Project highlightValue, + final Pair> source, + final Pair> target) { + return new UpdateInfo<>( + Topic.ENRICH_MISSING_PROJECT, + highlightValue, source.getLeft(), target.getLeft(), + (p, prj) -> p.getProjects().add(prj), + prj -> prj.getFunder() + "::" + prj.getFundingProgram() + prj.getCode()); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMoreProject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMoreProject.java new file mode 100644 index 000000000..d9bfb62d5 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMoreProject.java @@ -0,0 +1,40 @@ + +package eu.dnetlib.dhp.broker.oa.matchers.relatedProjects; + +import java.util.Arrays; +import java.util.List; + +import org.apache.commons.lang3.tuple.Pair; + +import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; +import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; +import eu.dnetlib.dhp.schema.oaf.Project; +import eu.dnetlib.dhp.schema.oaf.Result; + +public class EnrichMoreProject extends UpdateMatcher>, eu.dnetlib.broker.objects.Project> { + + public EnrichMoreProject() { + super(true); + } + + @Override + protected List> findUpdates(final Pair> source, + final Pair> target) { + // TODO + return Arrays.asList(); + } + + @Override + public UpdateInfo generateUpdateInfo( + final eu.dnetlib.broker.objects.Project highlightValue, + final Pair> source, + final Pair> target) { + return new UpdateInfo<>( + Topic.ENRICH_MORE_PROJECT, + highlightValue, source.getLeft(), target.getLeft(), + (p, prj) -> p.getProjects().add(prj), + prj -> prj.getFunder() + "::" + prj.getFundingProgram() + prj.getCode()); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/AbstractEnrichMissingPublication.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/AbstractEnrichMissingPublication.java new file mode 100644 index 000000000..75e77b3c6 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/AbstractEnrichMissingPublication.java @@ -0,0 +1,63 @@ + +package eu.dnetlib.dhp.broker.oa.matchers.relatedPublications; + +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; + +import org.apache.commons.lang3.tuple.Pair; + +import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; +import eu.dnetlib.dhp.broker.oa.util.ConversionUtils; +import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; +import eu.dnetlib.dhp.schema.oaf.Publication; +import eu.dnetlib.dhp.schema.oaf.Result; + +public abstract class AbstractEnrichMissingPublication + extends UpdateMatcher>, eu.dnetlib.broker.objects.Publication> { + + private final Topic topic; + + public AbstractEnrichMissingPublication(final Topic topic) { + super(true); + this.topic = topic; + } + + @Override + protected final List> findUpdates( + final Pair> source, + final Pair> target) { + + final Set existingPublications = target + .getRight() + .stream() + .map(Publication::getId) + .collect(Collectors.toSet()); + + return source + .getRight() + .stream() + .filter(d -> !existingPublications.contains(d.getId())) + .map(ConversionUtils::oafPublicationToBrokerPublication) + .map(i -> generateUpdateInfo(i, source, target)) + .collect(Collectors.toList()); + + } + + @Override + protected final UpdateInfo generateUpdateInfo( + final eu.dnetlib.broker.objects.Publication highlightValue, + final Pair> source, + final Pair> target) { + return new UpdateInfo<>( + getTopic(), + highlightValue, source.getLeft(), target.getLeft(), + (p, rel) -> p.getPublications().add(rel), + rel -> rel.getInstances().get(0).getUrl()); + } + + public Topic getTopic() { + return topic; + } +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsReferencedBy.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsReferencedBy.java new file mode 100644 index 000000000..73fa8a45f --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsReferencedBy.java @@ -0,0 +1,12 @@ + +package eu.dnetlib.dhp.broker.oa.matchers.relatedPublications; + +import eu.dnetlib.dhp.broker.model.Topic; + +public class EnrichMissingPublicationIsReferencedBy extends AbstractEnrichMissingPublication { + + public EnrichMissingPublicationIsReferencedBy() { + super(Topic.ENRICH_MISSING_PUBLICATION_IS_REFERENCED_BY); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsRelatedTo.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsRelatedTo.java new file mode 100644 index 000000000..361ea3b34 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsRelatedTo.java @@ -0,0 +1,12 @@ + +package eu.dnetlib.dhp.broker.oa.matchers.relatedPublications; + +import eu.dnetlib.dhp.broker.model.Topic; + +public class EnrichMissingPublicationIsRelatedTo extends AbstractEnrichMissingPublication { + + public EnrichMissingPublicationIsRelatedTo() { + super(Topic.ENRICH_MISSING_PUBLICATION_IS_RELATED_TO); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsSupplementedBy.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsSupplementedBy.java new file mode 100644 index 000000000..7e8863b1e --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsSupplementedBy.java @@ -0,0 +1,12 @@ + +package eu.dnetlib.dhp.broker.oa.matchers.relatedPublications; + +import eu.dnetlib.dhp.broker.model.Topic; + +public class EnrichMissingPublicationIsSupplementedBy extends AbstractEnrichMissingPublication { + + public EnrichMissingPublicationIsSupplementedBy() { + super(Topic.ENRICH_MISSING_PUBLICATION_IS_SUPPLEMENTED_BY); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsSupplementedTo.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsSupplementedTo.java new file mode 100644 index 000000000..dc4e51377 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsSupplementedTo.java @@ -0,0 +1,12 @@ + +package eu.dnetlib.dhp.broker.oa.matchers.relatedPublications; + +import eu.dnetlib.dhp.broker.model.Topic; + +public class EnrichMissingPublicationIsSupplementedTo extends AbstractEnrichMissingPublication { + + public EnrichMissingPublicationIsSupplementedTo() { + super(Topic.ENRICH_MISSING_PUBLICATION_IS_SUPPLEMENTED_TO); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationReferences.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationReferences.java new file mode 100644 index 000000000..5198098bc --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationReferences.java @@ -0,0 +1,12 @@ + +package eu.dnetlib.dhp.broker.oa.matchers.relatedPublications; + +import eu.dnetlib.dhp.broker.model.Topic; + +public class EnrichMissingPublicationReferences extends AbstractEnrichMissingPublication { + + public EnrichMissingPublicationReferences() { + super(Topic.ENRICH_MISSING_PUBLICATION_REFERENCES); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingAbstract.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAbstract.java similarity index 79% rename from dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingAbstract.java rename to dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAbstract.java index 43cf738f8..a418b633e 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingAbstract.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAbstract.java @@ -1,15 +1,16 @@ -package eu.dnetlib.dhp.broker.oa.matchers; +package eu.dnetlib.dhp.broker.oa.matchers.simple; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.schema.oaf.Result; -public class EnrichMissingAbstract extends UpdateMatcher { +public class EnrichMissingAbstract extends UpdateMatcher { public EnrichMissingAbstract() { super(false); @@ -24,7 +25,8 @@ public class EnrichMissingAbstract extends UpdateMatcher { } @Override - public UpdateInfo generateUpdateInfo(final String highlightValue, final Result source, + public UpdateInfo generateUpdateInfo(final String highlightValue, + final Result source, final Result target) { return new UpdateInfo<>( Topic.ENRICH_MISSING_ABSTRACT, diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingAuthorOrcid.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAuthorOrcid.java similarity index 77% rename from dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingAuthorOrcid.java rename to dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAuthorOrcid.java index beeccdbe8..b5c2f7e72 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingAuthorOrcid.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAuthorOrcid.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.broker.oa.matchers; +package eu.dnetlib.dhp.broker.oa.matchers.simple; import java.util.Arrays; import java.util.List; @@ -7,10 +7,11 @@ import java.util.List; import org.apache.commons.lang3.tuple.Pair; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.schema.oaf.Result; -public class EnrichMissingAuthorOrcid extends UpdateMatcher> { +public class EnrichMissingAuthorOrcid extends UpdateMatcher> { public EnrichMissingAuthorOrcid() { super(true); @@ -24,7 +25,8 @@ public class EnrichMissingAuthorOrcid extends UpdateMatcher @Override public UpdateInfo> generateUpdateInfo(final Pair highlightValue, - final Result source, final Result target) { + final Result source, + final Result target) { return new UpdateInfo<>( Topic.ENRICH_MISSING_AUTHOR_ORCID, highlightValue, source, target, diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingOpenAccess.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingOpenAccess.java similarity index 88% rename from dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingOpenAccess.java rename to dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingOpenAccess.java index a4a2ea0c6..c7e9dcbc1 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingOpenAccess.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingOpenAccess.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.broker.oa.matchers; +package eu.dnetlib.dhp.broker.oa.matchers.simple; import java.util.Arrays; import java.util.List; @@ -7,12 +7,13 @@ import java.util.stream.Collectors; import eu.dnetlib.broker.objects.Instance; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; import eu.dnetlib.dhp.broker.oa.util.BrokerConstants; import eu.dnetlib.dhp.broker.oa.util.ConversionUtils; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.schema.oaf.Result; -public class EnrichMissingOpenAccess extends UpdateMatcher { +public class EnrichMissingOpenAccess extends UpdateMatcher { public EnrichMissingOpenAccess() { super(true); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPid.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPid.java similarity index 85% rename from dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPid.java rename to dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPid.java index a8df62541..522d46d40 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPid.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPid.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.broker.oa.matchers; +package eu.dnetlib.dhp.broker.oa.matchers.simple; import java.util.Arrays; import java.util.List; @@ -7,11 +7,12 @@ import java.util.stream.Collectors; import eu.dnetlib.broker.objects.Pid; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; import eu.dnetlib.dhp.broker.oa.util.ConversionUtils; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.schema.oaf.Result; -public class EnrichMissingPid extends UpdateMatcher { +public class EnrichMissingPid extends UpdateMatcher { public EnrichMissingPid() { super(true); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationDate.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDate.java similarity index 88% rename from dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationDate.java rename to dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDate.java index 372a4e4c9..197ace97c 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationDate.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDate.java @@ -1,15 +1,16 @@ -package eu.dnetlib.dhp.broker.oa.matchers; +package eu.dnetlib.dhp.broker.oa.matchers.simple; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.schema.oaf.Result; -public class EnrichMissingPublicationDate extends UpdateMatcher { +public class EnrichMissingPublicationDate extends UpdateMatcher { public EnrichMissingPublicationDate() { super(false); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingSoftware.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingSoftware.java new file mode 100644 index 000000000..4fcba43a4 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingSoftware.java @@ -0,0 +1,42 @@ + +package eu.dnetlib.dhp.broker.oa.matchers.simple; + +import java.util.Arrays; +import java.util.List; + +import org.apache.commons.lang3.tuple.Pair; + +import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; +import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; +import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.schema.oaf.Software; + +public class EnrichMissingSoftware + extends UpdateMatcher>, eu.dnetlib.broker.objects.Software> { + + public EnrichMissingSoftware() { + super(true); + } + + @Override + protected List> findUpdates( + final Pair> source, + final Pair> target) { + // TODO + return Arrays.asList(); + } + + @Override + public UpdateInfo generateUpdateInfo( + final eu.dnetlib.broker.objects.Software highlightValue, + final Pair> source, + final Pair> target) { + return new UpdateInfo<>( + Topic.ENRICH_MISSING_SOFTWARE, + highlightValue, source.getLeft(), target.getLeft(), + (p, s) -> p.getSoftwares().add(s), + s -> s.getName()); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingSubject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingSubject.java similarity index 88% rename from dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingSubject.java rename to dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingSubject.java index 79e9d469b..290bad48b 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingSubject.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingSubject.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.broker.oa.matchers; +package eu.dnetlib.dhp.broker.oa.matchers.simple; import java.util.List; import java.util.Set; @@ -8,13 +8,14 @@ import java.util.stream.Collectors; import org.apache.commons.lang3.tuple.Pair; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; import eu.dnetlib.dhp.broker.oa.util.ConversionUtils; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.schema.oaf.Qualifier; import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.StructuredProperty; -public class EnrichMissingSubject extends UpdateMatcher> { +public class EnrichMissingSubject extends UpdateMatcher> { public EnrichMissingSubject() { super(true); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMoreOpenAccess.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreOpenAccess.java similarity index 88% rename from dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMoreOpenAccess.java rename to dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreOpenAccess.java index 40c9b0500..c376da44d 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMoreOpenAccess.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreOpenAccess.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.broker.oa.matchers; +package eu.dnetlib.dhp.broker.oa.matchers.simple; import java.util.List; import java.util.Set; @@ -7,12 +7,13 @@ import java.util.stream.Collectors; import eu.dnetlib.broker.objects.Instance; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; import eu.dnetlib.dhp.broker.oa.util.BrokerConstants; import eu.dnetlib.dhp.broker.oa.util.ConversionUtils; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.schema.oaf.Result; -public class EnrichMoreOpenAccess extends UpdateMatcher { +public class EnrichMoreOpenAccess extends UpdateMatcher { public EnrichMoreOpenAccess() { super(true); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMorePid.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMorePid.java similarity index 87% rename from dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMorePid.java rename to dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMorePid.java index 0e7b7766a..2ee327c83 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMorePid.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMorePid.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.broker.oa.matchers; +package eu.dnetlib.dhp.broker.oa.matchers.simple; import java.util.List; import java.util.Set; @@ -7,11 +7,12 @@ import java.util.stream.Collectors; import eu.dnetlib.broker.objects.Pid; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; import eu.dnetlib.dhp.broker.oa.util.ConversionUtils; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.schema.oaf.Result; -public class EnrichMorePid extends UpdateMatcher { +public class EnrichMorePid extends UpdateMatcher { public EnrichMorePid() { super(true); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreSoftware.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreSoftware.java new file mode 100644 index 000000000..a1affff62 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreSoftware.java @@ -0,0 +1,42 @@ + +package eu.dnetlib.dhp.broker.oa.matchers.simple; + +import java.util.Arrays; +import java.util.List; + +import org.apache.commons.lang3.tuple.Pair; + +import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; +import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; +import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.schema.oaf.Software; + +public class EnrichMoreSoftware + extends UpdateMatcher>, eu.dnetlib.broker.objects.Software> { + + public EnrichMoreSoftware() { + super(true); + } + + @Override + protected List> findUpdates( + final Pair> source, + final Pair> target) { + // TODO + return Arrays.asList(); + } + + @Override + public UpdateInfo generateUpdateInfo( + final eu.dnetlib.broker.objects.Software highlightValue, + final Pair> source, + final Pair> target) { + return new UpdateInfo<>( + Topic.ENRICH_MORE_SOFTWARE, + highlightValue, source.getLeft(), target.getLeft(), + (p, s) -> p.getSoftwares().add(s), + s -> s.getName()); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMoreSubject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreSubject.java similarity index 87% rename from dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMoreSubject.java rename to dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreSubject.java index e6374479b..b38445e88 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMoreSubject.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreSubject.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.broker.oa.matchers; +package eu.dnetlib.dhp.broker.oa.matchers.simple; import java.util.List; import java.util.Set; @@ -8,11 +8,12 @@ import java.util.stream.Collectors; import org.apache.commons.lang3.tuple.Pair; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; import eu.dnetlib.dhp.broker.oa.util.ConversionUtils; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.schema.oaf.Result; -public class EnrichMoreSubject extends UpdateMatcher> { +public class EnrichMoreSubject extends UpdateMatcher> { public EnrichMoreSubject() { super(true); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/BrokerConstants.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/BrokerConstants.java index d61d5bfb7..8e97192bf 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/BrokerConstants.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/BrokerConstants.java @@ -4,4 +4,6 @@ package eu.dnetlib.dhp.broker.oa.util; public class BrokerConstants { public final static String OPEN_ACCESS = "OPEN"; + public final static String IS_MERGED_IN_CLASS = "isMergedIn"; + } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ConversionUtils.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ConversionUtils.java index 2e2ce202a..2f87d0ee7 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ConversionUtils.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ConversionUtils.java @@ -7,6 +7,8 @@ import org.apache.commons.lang3.tuple.Pair; import eu.dnetlib.broker.objects.Instance; import eu.dnetlib.broker.objects.Pid; +import eu.dnetlib.dhp.schema.oaf.Dataset; +import eu.dnetlib.dhp.schema.oaf.Publication; import eu.dnetlib.dhp.schema.oaf.StructuredProperty; public class ConversionUtils { @@ -33,4 +35,15 @@ public class ConversionUtils { return Pair.of(sp.getQualifier().getClassid(), sp.getValue()); } + public static final eu.dnetlib.broker.objects.Dataset oafDatasetToBrokerDataset(final Dataset d) { + final eu.dnetlib.broker.objects.Dataset res = new eu.dnetlib.broker.objects.Dataset(); + // TODO + return res; + } + + public static final eu.dnetlib.broker.objects.Publication oafPublicationToBrokerPublication(final Publication d) { + final eu.dnetlib.broker.objects.Publication res = new eu.dnetlib.broker.objects.Publication(); + // TODO + return res; + } } diff --git a/dhp-workflows/dhp-dedup-openaire/pom.xml b/dhp-workflows/dhp-dedup-openaire/pom.xml index fcc356ac0..44cf9e67c 100644 --- a/dhp-workflows/dhp-dedup-openaire/pom.xml +++ b/dhp-workflows/dhp-dedup-openaire/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.1-SNAPSHOT + 1.2.2-SNAPSHOT 4.0.0 dhp-dedup-openaire diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/AuthorMerger.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/AuthorMerger.java index 108f4a4be..ee5fd5165 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/AuthorMerger.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/AuthorMerger.java @@ -1,159 +1,170 @@ -package eu.dnetlib.dhp.oa.dedup; -import com.wcohen.ss.JaroWinkler; -import eu.dnetlib.dhp.schema.oaf.Author; -import eu.dnetlib.dhp.schema.oaf.StructuredProperty; -import eu.dnetlib.pace.model.Person; -import org.apache.commons.lang3.StringUtils; -import scala.Tuple2; +package eu.dnetlib.dhp.oa.dedup; import java.text.Normalizer; import java.util.*; import java.util.stream.Collectors; +import org.apache.commons.lang3.StringUtils; + +import com.wcohen.ss.JaroWinkler; + +import eu.dnetlib.dhp.schema.oaf.Author; +import eu.dnetlib.dhp.schema.oaf.StructuredProperty; +import eu.dnetlib.pace.model.Person; +import scala.Tuple2; + public class AuthorMerger { - private static final Double THRESHOLD = 0.95; + private static final Double THRESHOLD = 0.95; - public static List merge(List> authors){ + public static List merge(List> authors) { - authors.sort(new Comparator>() { - @Override - public int compare(List o1, List o2) { - return -Integer.compare(countAuthorsPids(o1), countAuthorsPids(o2)); - } - }); + authors.sort((o1, o2) -> -Integer.compare(countAuthorsPids(o1), countAuthorsPids(o2))); - List author = new ArrayList<>(); + List author = new ArrayList<>(); - for(List a : authors){ - author = mergeAuthor(author, a); - } + for (List a : authors) { + author = mergeAuthor(author, a); + } - return author; + return author; - } + } - public static List mergeAuthor(final List a, final List b) { - int pa = countAuthorsPids(a); - int pb = countAuthorsPids(b); - List base, enrich; - int sa = authorsSize(a); - int sb = authorsSize(b); + public static List mergeAuthor(final List a, final List b) { + int pa = countAuthorsPids(a); + int pb = countAuthorsPids(b); + List base, enrich; + int sa = authorsSize(a); + int sb = authorsSize(b); - if (pa == pb) { - base = sa > sb ? a : b; - enrich = sa > sb ? b : a; - } else { - base = pa > pb ? a : b; - enrich = pa > pb ? b : a; - } - enrichPidFromList(base, enrich); - return base; - } + if (pa == pb) { + base = sa > sb ? a : b; + enrich = sa > sb ? b : a; + } else { + base = pa > pb ? a : b; + enrich = pa > pb ? b : a; + } + enrichPidFromList(base, enrich); + return base; + } - private static void enrichPidFromList(List base, List enrich) { - if (base == null || enrich == null) - return; - final Map basePidAuthorMap = base - .stream() - .filter(a -> a.getPid() != null && a.getPid().size() > 0) - .flatMap( - a -> a - .getPid() - .stream() - .map(p -> new Tuple2<>(pidToComparableString(p), a))) - .collect(Collectors.toMap(Tuple2::_1, Tuple2::_2, (x1, x2) -> x1)); + private static void enrichPidFromList(List base, List enrich) { + if (base == null || enrich == null) + return; + final Map basePidAuthorMap = base + .stream() + .filter(a -> a.getPid() != null && a.getPid().size() > 0) + .flatMap( + a -> a + .getPid() + .stream() + .map(p -> new Tuple2<>(pidToComparableString(p), a))) + .collect(Collectors.toMap(Tuple2::_1, Tuple2::_2, (x1, x2) -> x1)); - final List> pidToEnrich = enrich - .stream() - .filter(a -> a.getPid() != null && a.getPid().size() > 0) - .flatMap( - a -> a - .getPid() - .stream() - .filter(p -> !basePidAuthorMap.containsKey(pidToComparableString(p))) - .map(p -> new Tuple2<>(p, a))) - .collect(Collectors.toList()); + final List> pidToEnrich = enrich + .stream() + .filter(a -> a.getPid() != null && a.getPid().size() > 0) + .flatMap( + a -> a + .getPid() + .stream() + .filter(p -> !basePidAuthorMap.containsKey(pidToComparableString(p))) + .map(p -> new Tuple2<>(p, a))) + .collect(Collectors.toList()); - pidToEnrich - .forEach( - a -> { - Optional> simAuthor = base - .stream() - .map(ba -> new Tuple2<>(sim(ba, a._2()), ba)) - .max(Comparator.comparing(Tuple2::_1)); - if (simAuthor.isPresent() && simAuthor.get()._1() > THRESHOLD) { - Author r = simAuthor.get()._2(); - if (r.getPid() == null) { - r.setPid(new ArrayList<>()); - } - r.getPid().add(a._1()); - } - }); - } + pidToEnrich + .forEach( + a -> { + Optional> simAuthor = base + .stream() + .map(ba -> new Tuple2<>(sim(ba, a._2()), ba)) + .max(Comparator.comparing(Tuple2::_1)); - public static String pidToComparableString(StructuredProperty pid){ - return (pid.getQualifier()!=null? pid.getQualifier().getClassid()!=null?pid.getQualifier().getClassid().toLowerCase():"":"") + (pid.getValue()!=null? pid.getValue().toLowerCase():""); - } + if (simAuthor.isPresent()) { + double th = THRESHOLD; + // increase the threshold if the surname is too short + if (simAuthor.get()._2().getSurname() != null + && simAuthor.get()._2().getSurname().length() <= 3) + th = 0.99; - public static int countAuthorsPids(List authors) { - if (authors == null) - return 0; + if (simAuthor.get()._1() > th) { + Author r = simAuthor.get()._2(); + if (r.getPid() == null) { + r.setPid(new ArrayList<>()); + } + r.getPid().add(a._1()); + } + } + }); + } - return (int) authors.stream().filter(AuthorMerger::hasPid).count(); - } + public static String pidToComparableString(StructuredProperty pid) { + return (pid.getQualifier() != null + ? pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase() : "" + : "") + + (pid.getValue() != null ? pid.getValue().toLowerCase() : ""); + } - private static int authorsSize(List authors) { - if (authors == null) - return 0; - return authors.size(); - } + public static int countAuthorsPids(List authors) { + if (authors == null) + return 0; - private static Double sim(Author a, Author b) { + return (int) authors.stream().filter(AuthorMerger::hasPid).count(); + } - final Person pa = parse(a); - final Person pb = parse(b); + private static int authorsSize(List authors) { + if (authors == null) + return 0; + return authors.size(); + } - if (pa.isAccurate() & pb.isAccurate()) { - return new JaroWinkler() - .score(normalize(pa.getSurnameString()), normalize(pb.getSurnameString())); - } else { - return new JaroWinkler() - .score(normalize(pa.getNormalisedFullname()), normalize(pb.getNormalisedFullname())); - } - } + private static Double sim(Author a, Author b) { - private static boolean hasPid(Author a) { - if (a == null || a.getPid() == null || a.getPid().size() == 0) - return false; - return a.getPid().stream().anyMatch(p -> p != null && StringUtils.isNotBlank(p.getValue())); - } + final Person pa = parse(a); + final Person pb = parse(b); - private static Person parse(Author author) { - if (StringUtils.isNotBlank(author.getSurname())) { - return new Person(author.getSurname() + ", " + author.getName(), false); - } else { - return new Person(author.getFullname(), false); - } - } + // if both are accurate (e.g. they have name and surname) + if (pa.isAccurate() & pb.isAccurate()) { + return new JaroWinkler().score(normalize(pa.getSurnameString()), normalize(pb.getSurnameString())) * 0.5 + + new JaroWinkler().score(normalize(pa.getNameString()), normalize(pb.getNameString())) * 0.5; + } else { + return new JaroWinkler() + .score(normalize(pa.getNormalisedFullname()), normalize(pb.getNormalisedFullname())); + } + } - private static String normalize(final String s) { - return nfd(s) - .toLowerCase() - // do not compact the regexes in a single expression, would cause StackOverflowError - // in case - // of large input strings - .replaceAll("(\\W)+", " ") - .replaceAll("(\\p{InCombiningDiacriticalMarks})+", " ") - .replaceAll("(\\p{Punct})+", " ") - .replaceAll("(\\d)+", " ") - .replaceAll("(\\n)+", " ") - .trim(); - } + private static boolean hasPid(Author a) { + if (a == null || a.getPid() == null || a.getPid().size() == 0) + return false; + return a.getPid().stream().anyMatch(p -> p != null && StringUtils.isNotBlank(p.getValue())); + } - private static String nfd(final String s) { - return Normalizer.normalize(s, Normalizer.Form.NFD); - } + private static Person parse(Author author) { + if (StringUtils.isNotBlank(author.getSurname())) { + return new Person(author.getSurname() + ", " + author.getName(), false); + } else { + return new Person(author.getFullname(), false); + } + } + + private static String normalize(final String s) { + return nfd(s) + .toLowerCase() + // do not compact the regexes in a single expression, would cause StackOverflowError + // in case + // of large input strings + .replaceAll("(\\W)+", " ") + .replaceAll("(\\p{InCombiningDiacriticalMarks})+", " ") + .replaceAll("(\\p{Punct})+", " ") + .replaceAll("(\\d)+", " ") + .replaceAll("(\\n)+", " ") + .trim(); + } + + private static String nfd(final String s) { + return Normalizer.normalize(s, Normalizer.Form.NFD); + } } diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java index eed783e53..8028d5a94 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java @@ -1,3 +1,4 @@ + package eu.dnetlib.dhp.oa.dedup; import java.io.Serializable; @@ -73,7 +74,8 @@ public class DedupRecordFactory { } public static T entityMerger( - String id, Iterator> entities, long ts, DataInfo dataInfo, Class clazz) throws IllegalAccessException, InstantiationException { + String id, Iterator> entities, long ts, DataInfo dataInfo, Class clazz) + throws IllegalAccessException, InstantiationException { T entity = clazz.newInstance(); @@ -87,14 +89,14 @@ public class DedupRecordFactory { entity.mergeFrom(duplicate); if (ModelSupport.isSubClass(duplicate, Result.class)) { Result r1 = (Result) duplicate; - if (r1.getAuthor() != null && r1.getAuthor().size()>0) + if (r1.getAuthor() != null && r1.getAuthor().size() > 0) authors.add(r1.getAuthor()); if (r1.getDateofacceptance() != null) dates.add(r1.getDateofacceptance().getValue()); } }); - //set authors and date + // set authors and date if (ModelSupport.isSubClass(entity, Result.class)) { ((Result) entity).setDateofacceptance(DatePicker.pick(dates)); ((Result) entity).setAuthor(AuthorMerger.merge(authors)); diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java index 526dd73bb..b8ccb038d 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java @@ -1,5 +1,8 @@ + package eu.dnetlib.dhp.oa.dedup; +import static org.junit.jupiter.api.Assertions.assertEquals; + import java.io.BufferedReader; import java.io.FileReader; import java.io.IOException; @@ -7,19 +10,18 @@ import java.io.Serializable; import java.nio.file.Paths; import java.util.*; -import eu.dnetlib.dhp.schema.oaf.*; -import eu.dnetlib.pace.util.MapDocumentUtil; import org.codehaus.jackson.map.ObjectMapper; import org.junit.jupiter.api.BeforeEach; - import org.junit.jupiter.api.Test; -import scala.Tuple2; -import static org.junit.jupiter.api.Assertions.assertEquals; +import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.pace.util.MapDocumentUtil; +import scala.Tuple2; public class EntityMergerTest implements Serializable { List> publications; + List> publications2; String testEntityBasePath; DataInfo dataInfo; @@ -30,11 +32,12 @@ public class EntityMergerTest implements Serializable { public void setUp() throws Exception { testEntityBasePath = Paths - .get(SparkDedupTest.class.getResource("/eu/dnetlib/dhp/dedup/json").toURI()) - .toFile() - .getAbsolutePath(); + .get(SparkDedupTest.class.getResource("/eu/dnetlib/dhp/dedup/json").toURI()) + .toFile() + .getAbsolutePath(); publications = readSample(testEntityBasePath + "/publication_merge.json", Publication.class); + publications2 = readSample(testEntityBasePath + "/publication_merge2.json", Publication.class); pub_top = getTopPub(publications); @@ -45,7 +48,8 @@ public class EntityMergerTest implements Serializable { @Test public void publicationMergerTest() throws InstantiationException, IllegalAccessException { - Publication pub_merged = DedupRecordFactory.entityMerger(dedupId, publications.iterator(), 0, dataInfo, Publication.class); + Publication pub_merged = DedupRecordFactory + .entityMerger(dedupId, publications.iterator(), 0, dataInfo, Publication.class); assertEquals(dedupId, pub_merged.getId()); @@ -59,36 +63,55 @@ public class EntityMergerTest implements Serializable { assertEquals(pub_merged.getDateoftransformation(), pub_top.getDateoftransformation()); assertEquals(pub_merged.getOaiprovenance(), pub_top.getOaiprovenance()); assertEquals(pub_merged.getDateofcollection(), pub_top.getDateofcollection()); - assertEquals(pub_merged.getInstance().size(),3); + assertEquals(pub_merged.getInstance().size(), 3); assertEquals(pub_merged.getCountry().size(), 2); assertEquals(pub_merged.getSubject().size(), 0); assertEquals(pub_merged.getTitle().size(), 2); - assertEquals(pub_merged.getRelevantdate().size(),0); - assertEquals(pub_merged.getDescription().size(),0); - assertEquals(pub_merged.getSource().size(),0); - assertEquals(pub_merged.getFulltext().size(),0); - assertEquals(pub_merged.getFormat().size(),0); - assertEquals(pub_merged.getContributor().size(),0); - assertEquals(pub_merged.getCoverage().size(),0); - assertEquals(pub_merged.getContext().size(),0); - assertEquals(pub_merged.getExternalReference().size(),0); - assertEquals(pub_merged.getOriginalId().size(),3); - assertEquals(pub_merged.getCollectedfrom().size(),3); - assertEquals(pub_merged.getPid().size(),1); - assertEquals(pub_merged.getExtraInfo().size(),0); + assertEquals(pub_merged.getRelevantdate().size(), 0); + assertEquals(pub_merged.getDescription().size(), 0); + assertEquals(pub_merged.getSource().size(), 0); + assertEquals(pub_merged.getFulltext().size(), 0); + assertEquals(pub_merged.getFormat().size(), 0); + assertEquals(pub_merged.getContributor().size(), 0); + assertEquals(pub_merged.getCoverage().size(), 0); + assertEquals(pub_merged.getContext().size(), 0); + assertEquals(pub_merged.getExternalReference().size(), 0); + assertEquals(pub_merged.getOriginalId().size(), 3); + assertEquals(pub_merged.getCollectedfrom().size(), 3); + assertEquals(pub_merged.getPid().size(), 1); + assertEquals(pub_merged.getExtraInfo().size(), 0); - //verify datainfo + // verify datainfo assertEquals(pub_merged.getDataInfo(), dataInfo); - //verify datepicker + // verify datepicker assertEquals(pub_merged.getDateofacceptance().getValue(), "2018-09-30"); - //verify authors + // verify authors assertEquals(pub_merged.getAuthor().size(), 9); assertEquals(AuthorMerger.countAuthorsPids(pub_merged.getAuthor()), 4); + + // verify title + int count = 0; + for (StructuredProperty title : pub_merged.getTitle()) { + if (title.getQualifier().getClassid().equals("main title")) + count++; + } + assertEquals(count, 1); } - public DataInfo setDI(){ + @Test + public void publicationMergerTest2() throws InstantiationException, IllegalAccessException { + + Publication pub_merged = DedupRecordFactory + .entityMerger(dedupId, publications2.iterator(), 0, dataInfo, Publication.class); + + assertEquals(pub_merged.getAuthor().size(), 27); + // insert assertions here + + } + + public DataInfo setDI() { DataInfo dataInfo = new DataInfo(); dataInfo.setTrust("0.9"); dataInfo.setDeletedbyinference(false); @@ -97,13 +120,13 @@ public class EntityMergerTest implements Serializable { return dataInfo; } - public Publication getTopPub(List> publications){ + public Publication getTopPub(List> publications) { Double maxTrust = 0.0; Publication maxPub = new Publication(); for (Tuple2 publication : publications) { Double pubTrust = Double.parseDouble(publication._2().getDataInfo().getTrust()); - if(pubTrust > maxTrust){ + if (pubTrust > maxTrust) { maxTrust = pubTrust; maxPub = publication._2(); } @@ -118,11 +141,11 @@ public class EntityMergerTest implements Serializable { reader = new BufferedReader(new FileReader(path)); String line = reader.readLine(); while (line != null) { - res.add( + res + .add( new Tuple2<>( - MapDocumentUtil.getJPathString("$.id", line), - new ObjectMapper().readValue(line, clazz)) - ); + MapDocumentUtil.getJPathString("$.id", line), + new ObjectMapper().readValue(line, clazz))); // read next line line = reader.readLine(); } @@ -134,5 +157,4 @@ public class EntityMergerTest implements Serializable { return res; } - } diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/json/publication_merge.json b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/json/publication_merge.json index 015f9294a..28548c532 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/json/publication_merge.json +++ b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/json/publication_merge.json @@ -1,3 +1,3 @@ {"context": [], "dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:datasetarchive", "classname": "sysimport:crosswalk:datasetarchive", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "dedup-similarity-result-levenstein", "invisible": false, "trust": "0.95"}, "resourcetype": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "pid": [], "contributor": [], "resulttype": {"classid": "publication", "classname": "publication", "schemename": "dnet:result_typologies", "schemeid": "dnet:result_typologies"}, "relevantdate": [], "collectedfrom": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "Journal.fi", "key": "10|openaire____::6eef8049d0feedc089ee009abca55e35"}], "id": "50|a89337edbe55::4930db9e954866d70916cbfba9f81f97", "subject": [], "instance": [{"refereed": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "hostedby": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "Journal.fi", "key": "10|openaire____::6eef8049d0feedc089ee009abca55e35"}, "processingchargeamount": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "license": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "url": [], "distributionlocation": "", "processingchargecurrency": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "dateofacceptance": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "2016-01-01"}, "collectedfrom": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "Journal.fi", "key": "10|openaire____::6eef8049d0feedc089ee009abca55e35"}, "accessright": {"classid": "OPEN", "classname": "Open Access", "schemename": "dnet:access_modes", "schemeid": "dnet:access_modes"}, "instancetype": {"classid": "0001", "classname": "Article", "schemename": "dnet:dataCite_resource", "schemeid": "dnet:dataCite_resource"}}], "embargoenddate": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "lastupdatetimestamp": 0, "author": [{"surname": "Go\\u0308tz", "name": "Julia", "pid": [], "rank": 5, "affiliation": [], "fullname": "G\\u00f6tz, Julia"}, {"surname": "Wolff", "name": "Stephan", "pid": [], "rank": 6, "affiliation": [], "fullname": "Wolff, Stephan"}, {"surname": "Jansen", "name": "Olav", "pid": [], "rank": 7, "affiliation": [], "fullname": "Jansen, Olav"}, {"surname": "Dressler", "name": "Dirk", "pid": [{"qualifier": {"classid": "ORCID", "classname": "ORCID"}, "value": "0000-0000-0656-9999"},{"qualifier": {"classid": "id", "classname": "id"}, "value": "987654321"}], "rank": 8, "affiliation": [], "fullname": "Dressler, Dirk"}, {"surname": "Schneider", "name": "Susanne A.", "pid": [], "rank": 9, "affiliation": [], "fullname": "Schneider, Susanne A."}], "source": [], "dateofcollection": "2019-11-05T14:49:22.351Z", "fulltext": [], "dateoftransformation": "2019-11-05T16:10:58.988Z", "description": [], "format": [], "journal": {"issnPrinted": "1459-6067", "conferencedate": "", "conferenceplace": "", "name": "Agricultural and Food Science", "edition": "", "iss": "3", "sp": "", "vol": "27", "dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "issnOnline": "1795-1895", "ep": "", "issnLinking": ""}, "coverage": [], "publisher": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "language": {"classid": "eng", "classname": "English", "schemename": "dnet:languages", "schemeid": "dnet:languages"}, "bestaccessright": {"classid": "OPEN", "classname": "Open Access", "schemename": "dnet:access_modes", "schemeid": "dnet:access_modes"}, "country": [], "extraInfo": [], "originalId": [], "dateofacceptance": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "2018-09-30"}, "title": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "qualifier": {"classid": "main title", "classname": "main title", "schemename": "dnet:dataCite_title", "schemeid": "dnet:dataCite_title"}, "value": "Altered brain activation in a reversal learning task unmasks adaptive changes in cognitive control in writer's cramp"}]} {"context": [], "dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "sysimport:crosswalk:repository", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": true, "inferenceprovenance": "dedup-similarity-result-levenstein", "invisible": false, "trust": "0.9"}, "resourcetype": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "pid": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "qualifier": {"classid": "doi", "classname": "doi", "schemename": "dnet:pid_types", "schemeid": "dnet:pid_types"}, "value": "10.1016/j.nicl.2015.11.006"}], "contributor": [], "resulttype": {"classid": "publication", "classname": "publication", "schemename": "dnet:result_typologies", "schemeid": "dnet:result_typologies"}, "relevantdate": [], "collectedfrom": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "BASE (Open Access Aggregator)", "key": "10|openaire____::df45502607927471ecf8a6ae83683ff5"}], "id": "50|base_oa_____::0968af610a356656706657e4f234b340", "subject": [], "instance": [{"refereed": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "hostedby": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "NeuroImage: Clinical", "key": "10|doajarticles::0c0e74daa5d95504eade9c81ebbd5b8a"}, "processingchargeamount": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "license": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "http://creativecommons.org/licenses/by-nc-nd/4.0/"}, "url": ["http://dx.doi.org/10.1016/j.nicl.2015.11.006"], "distributionlocation": "", "processingchargecurrency": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "dateofacceptance": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "2016-01-01"}, "collectedfrom": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "BASE (Open Access Aggregator)", "key": "10|openaire____::df45502607927471ecf8a6ae83683ff5"}, "accessright": {"classid": "OPEN", "classname": "Open Access", "schemename": "dnet:access_modes", "schemeid": "dnet:access_modes"}, "instancetype": {"classid": "0001", "classname": "Article", "schemename": "dnet:publication_resource", "schemeid": "dnet:publication_resource"}}], "embargoenddate": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "lastupdatetimestamp": 0, "author": [{"surname": "Zeuner", "name": "Kirsten E.", "pid": [], "rank": 1, "affiliation": [], "fullname": "Zeuner, Kirsten E."}, {"surname": "Knutzen", "name": "Arne", "pid": [], "rank": 2, "affiliation": [], "fullname": "Knutzen, Arne"}, {"surname": "Granert", "name": "Oliver", "pid": [{"qualifier": {"classid": "ORCID", "classname": "ORCID"}, "value": "0000-0002-0656-1023"}], "rank": 3, "affiliation": [], "fullname": "Granert, Oliver"}, {"surname": "Sablowsky", "name": "Simone", "pid": [], "rank": 4, "affiliation": [], "fullname": "Sablowsky, Simone"}, {"surname": "Go\\u0308tz", "name": "Julia", "pid": [], "rank": 5, "affiliation": [], "fullname": "G\\u00f6tz, Julia"}, {"surname": "Wolff", "name": "Stephan", "pid": [], "rank": 6, "affiliation": [], "fullname": "Wolff, Stephan"}, {"surname": "Jansen", "name": "Olav", "pid": [], "rank": 7, "affiliation": [], "fullname": "Jansen, Olav"}, {"surname": "Dressler", "name": "Dirk", "pid": [], "rank": 8, "affiliation": [], "fullname": "Dressler, Dirk"}, {"surname": "Schneider", "name": "Susanne A.", "pid": [], "rank": 9, "affiliation": [], "fullname": "Schneider, Susanne A."}, {"surname": "Klein", "name": "Christine", "pid": [], "rank": 10, "affiliation": [], "fullname": "Klein, Christine"}, {"surname": "Deuschl", "name": "Gu\\u0308nther", "pid": [], "rank": 11, "affiliation": [], "fullname": "Deuschl, G\\u00fcnther"}, {"surname": "Eimeren", "name": "Thilo", "pid": [], "rank": 12, "affiliation": [], "fullname": "van Eimeren, Thilo"}, {"surname": "Witt", "name": "Karsten", "pid": [], "rank": 13, "affiliation": [], "fullname": "Witt, Karsten"}], "source": [], "dateofcollection": "2017-07-27T19:04:09.131Z", "fulltext": [], "dateoftransformation": "2019-01-23T10:15:19.582Z", "description": [], "format": [], "journal": {"issnPrinted": "2213-1582", "conferencedate": "", "conferenceplace": "", "name": "NeuroImage: Clinical", "edition": "", "iss": "", "sp": "63", "vol": "10", "dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "issnOnline": "", "ep": "70", "issnLinking": ""}, "coverage": [], "publisher": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "Elsevier BV"}, "language": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "bestaccessright": {"classid": "OPEN", "classname": "Open Access", "schemename": "dnet:access_modes", "schemeid": "dnet:access_modes"}, "country": [{"classid": "IT", "classname": "Italy", "schemeid": "dnet:countries", "schemename": "dnet:countries"}], "extraInfo": [], "originalId": ["10.1016/j.nicl.2015.11.006"], "dateofacceptance": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "2016-01-01"}, "title": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "qualifier": {"classid": "main title", "classname": "main title", "schemename": "dnet:dataCite_title", "schemeid": "dnet:dataCite_title"}, "value": "Altered brain activation in a reversal learning task unmasks adaptive changes in cognitive control in writer's cramp"}]} -{"context": [], "dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:datasetarchive", "classname": "sysimport:crosswalk:datasetarchive", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": true, "inferenceprovenance": "dedup-similarity-result-levenstein", "invisible": false, "trust": "0.9"}, "resourcetype": {"classid": "0004", "classname": "Conference object", "schemename": "dnet:dataCite_resource", "schemeid": "dnet:dataCite_resource"}, "pid": [], "contributor": [], "resulttype": {"classid": "publication", "classname": "publication", "schemename": "dnet:result_typologies", "schemeid": "dnet:result_typologies"}, "relevantdate": [], "collectedfrom": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "CRIS UNS (Current Research Information System University of Novi Sad)", "key": "10|CRIS_UNS____::f66f1bd369679b5b077dcdf006089556"}], "id": "50|CrisUnsNoviS::9f9d014eea45dab432cab636c4c9cf39", "subject": [], "instance": [{"refereed": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "hostedby": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "CRIS UNS (Current Research Information System University of Novi Sad)", "key": "10|CRIS_UNS____::f66f1bd369679b5b077dcdf006089556"}, "processingchargeamount": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "license": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "url": ["https://www.cris.uns.ac.rs/record.jsf?recordId=113444&source=OpenAIRE&language=en"], "distributionlocation": "", "processingchargecurrency": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "dateofacceptance": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "2019-01-01"}, "collectedfrom": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "CRIS UNS (Current Research Information System University of Novi Sad)", "key": "10|CRIS_UNS____::f66f1bd369679b5b077dcdf006089556"}, "accessright": {"classid": "UNKNOWN", "classname": "UNKNOWN", "schemename": "dnet:access_modes", "schemeid": "dnet:access_modes"}, "instancetype": {"classid": "0004", "classname": "Conference object", "schemename": "dnet:dataCite_resource", "schemeid": "dnet:dataCite_resource"}}], "embargoenddate": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "lastupdatetimestamp": 0, "author": [{"surname": "Zeuner", "name": "Kirsten E.", "pid": [], "rank": 1, "affiliation": [], "fullname": "Zeuner, Kirsten E."}, {"surname": "Knutzen", "name": "Arne", "pid": [], "rank": 2, "affiliation": [], "fullname": "Knutzen, Arne"}, {"surname": "Granert", "name": "Oliver", "pid": [{"qualifier": {"classid": "ORCID", "classname": "ORCID"}, "value": "0000-0002-0656-1023"}, {"qualifier": {"classid": "pubmed", "classname": "pubmed"}, "value": "pubmed.it"}], "rank": 3, "affiliation": [], "fullname": "Granert, Oliver"}, {"surname": "Sablowsky", "name": "Simone", "pid": [{"qualifier": {"classid": "id", "classname": "id"}, "value": "12345678"}], "rank": 4, "affiliation": [], "fullname": "Sablowsky, Simone"}, {"surname": "Go\\u0308tz", "name": "Julia", "pid": [], "rank": 5, "affiliation": [], "fullname": "G\\u00f6tz, Julia"}, {"surname": "Wolff", "name": "Stephan", "pid": [], "rank": 6, "affiliation": [], "fullname": "Wolff, Stephan"}, {"surname": "Jansen", "name": "Olav", "pid": [{"qualifier": {"classid": "ORCID", "classname": "ORCID"}, "value": "0000-0000-0656-1023"},{"qualifier": {"classid": "id", "classname": "id"}, "value": "987654321"}], "rank": 7, "affiliation": [], "fullname": "Jansen, Olav"}, {"surname": "Dressler", "name": "Dirk", "pid": [], "rank": 8, "affiliation": [], "fullname": "Dressler, Dirk"}, {"surname": "Schneider", "name": "Susanne A.", "pid": [], "rank": 9, "affiliation": [], "fullname": "Schneider, Susanne A."}], "source": [], "dateofcollection": "2020-03-10T15:05:38.685Z", "fulltext": [], "dateoftransformation": "2020-03-11T20:11:13.15Z", "description": [], "format": [], "journal": {"issnPrinted": "", "conferencedate": "", "conferenceplace": "", "name": "", "edition": "", "iss": "", "sp": "", "vol": "", "dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "issnOnline": "", "ep": "", "issnLinking": ""}, "coverage": [], "publisher": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "language": {"classid": "en", "classname": "en", "schemename": "dnet:languages", "schemeid": "dnet:languages"}, "bestaccessright": {"classid": "UNKNOWN", "classname": "not available", "schemename": "dnet:access_modes", "schemeid": "dnet:access_modes"}, "country": [{"classid": "FI", "classname": "Finland", "schemeid": "dnet:countries", "schemename": "dnet:countries"}], "extraInfo": [], "originalId": ["(BISIS)113444", "https://www.cris.uns.ac.rs/record.jsf?recordId=113444&source=OpenAIRE&language=en"], "dateofacceptance": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "2016-01-01"}, "title": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "qualifier": {"classid": "main title", "classname": "main title", "schemename": "dnet:dataCite_title", "schemeid": "dnet:dataCite_title"}, "value": "Antichains of copies of ultrahomogeneous structures"}]} \ No newline at end of file +{"context": [], "dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:datasetarchive", "classname": "sysimport:crosswalk:datasetarchive", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": true, "inferenceprovenance": "dedup-similarity-result-levenstein", "invisible": false, "trust": "0.9"}, "resourcetype": {"classid": "0004", "classname": "Conference object", "schemename": "dnet:dataCite_resource", "schemeid": "dnet:dataCite_resource"}, "pid": [], "contributor": [], "resulttype": {"classid": "publication", "classname": "publication", "schemename": "dnet:result_typologies", "schemeid": "dnet:result_typologies"}, "relevantdate": [], "collectedfrom": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "CRIS UNS (Current Research Information System University of Novi Sad)", "key": "10|CRIS_UNS____::f66f1bd369679b5b077dcdf006089556"}], "id": "50|CrisUnsNoviS::9f9d014eea45dab432cab636c4c9cf39", "subject": [], "instance": [{"refereed": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "hostedby": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "CRIS UNS (Current Research Information System University of Novi Sad)", "key": "10|CRIS_UNS____::f66f1bd369679b5b077dcdf006089556"}, "processingchargeamount": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "license": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "url": ["https://www.cris.uns.ac.rs/record.jsf?recordId=113444&source=OpenAIRE&language=en"], "distributionlocation": "", "processingchargecurrency": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "dateofacceptance": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "2019-01-01"}, "collectedfrom": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "CRIS UNS (Current Research Information System University of Novi Sad)", "key": "10|CRIS_UNS____::f66f1bd369679b5b077dcdf006089556"}, "accessright": {"classid": "UNKNOWN", "classname": "UNKNOWN", "schemename": "dnet:access_modes", "schemeid": "dnet:access_modes"}, "instancetype": {"classid": "0004", "classname": "Conference object", "schemename": "dnet:dataCite_resource", "schemeid": "dnet:dataCite_resource"}}], "embargoenddate": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "lastupdatetimestamp": 0, "author": [{"surname": "Zeuner", "name": "Kirsten E.", "pid": [], "rank": 1, "affiliation": [], "fullname": "Zeuner, Kirsten E."}, {"surname": "Knutzen", "name": "Arne", "pid": [], "rank": 2, "affiliation": [], "fullname": "Knutzen, Arne"}, {"surname": "Granert", "name": "Oliver", "pid": [{"qualifier": {"classid": "ORCID", "classname": "ORCID"}, "value": "0000-0002-0656-1023"}, {"qualifier": {"classid": "pubmed", "classname": "pubmed"}, "value": "pubmed.it"}], "rank": 3, "affiliation": [], "fullname": "Granert, Oliver"}, {"surname": "Sablowsky", "name": "Simone", "pid": [{"qualifier": {"classid": "id", "classname": "id"}, "value": "12345678"}], "rank": 4, "affiliation": [], "fullname": "Sablowsky, Simone"}, {"surname": "Go\\u0308tz", "name": "Julia", "pid": [], "rank": 5, "affiliation": [], "fullname": "G\\u00f6tz, Julia"}, {"surname": "Wolff", "name": "Stephan", "pid": [], "rank": 6, "affiliation": [], "fullname": "Wolff, Stephan"}, {"surname": "Jansen", "name": "Olav", "pid": [{"qualifier": {"classid": "ORCID", "classname": "ORCID"}, "value": "0000-0000-0656-1023"},{"qualifier": {"classid": "id", "classname": "id"}, "value": "987654321"}], "rank": 7, "affiliation": [], "fullname": "Jansen, Olav"}, {"surname": "Dressler", "name": "Dirk", "pid": [], "rank": 8, "affiliation": [], "fullname": "Dressler, Dirk"}, {"surname": "Schneider", "name": "Susanne A.", "pid": [], "rank": 9, "affiliation": [], "fullname": "Schneider, Susanne A."}], "source": [], "dateofcollection": "2020-03-10T15:05:38.685Z", "fulltext": [], "dateoftransformation": "2020-03-11T20:11:13.15Z", "description": [], "format": [], "journal": {"issnPrinted": "", "conferencedate": "", "conferenceplace": "", "name": "", "edition": "", "iss": "", "sp": "", "vol": "", "dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "issnOnline": "", "ep": "", "issnLinking": ""}, "coverage": [], "publisher": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "language": {"classid": "en", "classname": "en", "schemename": "dnet:languages", "schemeid": "dnet:languages"}, "bestaccessright": {"classid": "UNKNOWN", "classname": "not available", "schemename": "dnet:access_modes", "schemeid": "dnet:access_modes"}, "country": [{"classid": "FI", "classname": "Finland", "schemeid": "dnet:countries", "schemename": "dnet:countries"}], "extraInfo": [], "originalId": ["(BISIS)113444", "https://www.cris.uns.ac.rs/record.jsf?recordId=113444&source=OpenAIRE&language=en"], "dateofacceptance": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "2016-01-01"}, "title": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "qualifier": {"classid": "test title", "classname": "test title", "schemename": "dnet:dataCite_title", "schemeid": "dnet:dataCite_title"}, "value": "Antichains of copies of ultrahomogeneous structures"}]} \ No newline at end of file diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/json/publication_merge2.json b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/json/publication_merge2.json new file mode 100644 index 000000000..a7937c287 --- /dev/null +++ b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/json/publication_merge2.json @@ -0,0 +1,4 @@ +{"id":"50|doajarticles::842fa3b99fcdccafb4d5c8a815f56efa","dateofcollection":"2020-04-06T12:22:31.216Z","title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Increased Expression of Herpes Virus-Encoded hsv1-miR-H18 and hsv2-miR-H9-5p in Cancer-Containing Prostate Tissue Compared to That in Benign Prostate Hyperplasia Tissue"}],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Korean Continence Society"},"bestaccessright":null,"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"collectedfrom":[{"dataInfo":null,"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.5213/inj.1632552.276"}],"author":[{"affiliation":null,"fullname":"Seok Joong Yun","name":null,"pid":[],"rank":1,"surname":null},{"affiliation":null,"fullname":"Pildu Jeong","name":null,"pid":[],"rank":2,"surname":null},{"affiliation":null,"fullname":"Ho Won Kang","name":null,"pid":[],"rank":3,"surname":null},{"affiliation":null,"fullname":"Helen Ki Shinn","name":null,"pid":[],"rank":4,"surname":null},{"affiliation":null,"fullname":"Ye-Hwan Kim","name":null,"pid":[],"rank":5,"surname":null},{"affiliation":null,"fullname":"Chunri Yan","name":null,"pid":[],"rank":6,"surname":null},{"affiliation":null,"fullname":"Young-Ki Choi","name":null,"pid":[],"rank":7,"surname":null},{"affiliation":null,"fullname":"Dongho Kim","name":null,"pid":[],"rank":8,"surname":null},{"affiliation":null,"fullname":"Dong Hee Ryu","name":null,"pid":[],"rank":9,"surname":null},{"affiliation":null,"fullname":"Yun-Sok Ha","name":null,"pid":[],"rank":10,"surname":null},{"affiliation":null,"fullname":"Tae-Hwan Kim","name":null,"pid":[],"rank":11,"surname":null},{"affiliation":null,"fullname":"Tae Gyun Kwon","name":null,"pid":[],"rank":12,"surname":null},{"affiliation":null,"fullname":"Jung Min Kim","name":null,"pid":[],"rank":13,"surname":null},{"affiliation":null,"fullname":"Sang Heon Suh","name":null,"pid":[],"rank":14,"surname":null},{"affiliation":null,"fullname":"Seon-Kyu Kim","name":null,"pid":[],"rank":15,"surname":null},{"affiliation":null,"fullname":"Seon-Young Kim","name":null,"pid":[],"rank":16,"surname":null},{"affiliation":null,"fullname":"Sang Tae Kim","name":null,"pid":[],"rank":17,"surname":null},{"affiliation":null,"fullname":"Won Tae Kim","name":null,"pid":[],"rank":18,"surname":null},{"affiliation":null,"fullname":"Ok-Jun Lee","name":null,"pid":[],"rank":19,"surname":null},{"affiliation":null,"fullname":"Sung-Kwon Moon","name":null,"pid":[],"rank":20,"surname":null},{"affiliation":null,"fullname":"Nam-Hyung Kim","name":null,"pid":[],"rank":21,"surname":null},{"affiliation":null,"fullname":"Isaac Yi Kim","name":null,"pid":[],"rank":22,"surname":null},{"affiliation":null,"fullname":"Jayoung Kim","name":null,"pid":[],"rank":23,"surname":null},{"affiliation":null,"fullname":"Hee-Jae Cha","name":null,"pid":[],"rank":24,"surname":null},{"affiliation":null,"fullname":"Yung-Hyun Choi","name":null,"pid":[],"rank":25,"surname":null},{"affiliation":null,"fullname":"Eun-Jong Cha","name":null,"pid":[],"rank":26,"surname":null},{"affiliation":null,"fullname":"Wun-Jae Kim","name":null,"pid":[],"rank":27,"surname":null}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"MicroRNAs"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Prostate Neoplasms"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Herpesviridae"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Prostate Hyperplasia"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Diseases of the genitourinary system. Urology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"RC870-923"}],"description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Purpose: Previously, we reported the presence of virus-encoded microRNAs (miRNAs) in the urine of prostate cancer (CaP) patients. In this study, we investigated the expression of two herpes virus-encoded miRNAs in prostate tissue. Methods: A total of 175 tissue samples from noncancerous benign prostatic hyperplasia (BPH), 248 tissue samples from patients with CaP and BPH, and 50 samples from noncancerous surrounding tissues from these same patients were analyzed for the expression of two herpes virus-encoded miRNAs by real-time polymerase chain reaction (PCR) and immunocytochemistry using nanoparticles as molecular beacons. Results: Real-time reverse transcription-PCR results revealed significantly higher expression of hsv1-miR-H18 and hsv2-miRH9- 5p in surrounding noncancerous and CaP tissues than that in BPH tissue (each comparison, P<0.001). Of note, these miRNA were expressed equivalently in the CaP tissues and surrounding noncancerous tissues. Moreover, immunocytochemistry clearly demonstrated a significant enrichment of both hsv1-miR-H18 and hsv2-miR-H9 beacon-labeled cells in CaP and surrounding noncancerous tissue compared to that in BPH tissue (each comparison, P<0.05 for hsv1-miR-H18 and hsv2- miR-H9). Conclusions: These results suggest that increased expression of hsv1-miR-H18 and hsv2-miR-H95p might be associated with tumorigenesis in the prostate. Further studies will be required to elucidate the role of these miRNAs with respect to CaP and herpes viral infections."}],"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2016-06-01"},"embargoenddate":null,"resourcetype":null,"context":[],"instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2016-06-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|doajarticles::52db9a4f8e176f6e8e1d9f0c1e0a2de0","value":"International Neurourology Journal"},"instancetype":{"classid":"0001","classname":"peerReviewed","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":["http://www.einj.org/upload/pdf/inj-1632552-276.pdf","https://doaj.org/toc/2093-4777","https://doaj.org/toc/2093-6931"]}]} +{"id":"50|od_______267::b5f5da11a8239ef57655cea8675cb466","dateofcollection":"","title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Increased Expression of Herpes Virus-Encoded hsv1-miR-H18 and hsv2-miR-H9-5p in Cancer-Containing Prostate Tissue Compared to That in Benign Prostate Hyperplasia Tissue"}],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Korean Continence Society"},"bestaccessright":null,"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"collectedfrom":[{"dataInfo":null,"key":"10|opendoar____::eda80a3d5b344bc40f3bc04f65b7a357","value":"PubMed Central"}],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.5213/inj.1632552.276"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"pmc","classname":"pmc","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"PMC4932644"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"pmid","classname":"pmid","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"27377944"}],"author":[{"affiliation":null,"fullname":"Yun, Seok Joong","name":"Seok Joong","pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0001-7737-4746"}],"rank":1,"surname":"Yun"},{"affiliation":null,"fullname":"Jeong, Pildu","name":"Pildu","pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0002-5602-5376"}],"rank":2,"surname":"Jeong"},{"affiliation":null,"fullname":"Kang, Ho Won","name":"Ho Won","pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0002-8164-4427"}],"rank":3,"surname":"Kang"},{"affiliation":null,"fullname":"Shinn, Helen Ki","name":"Helen Ki","pid":[],"rank":4,"surname":"Shinn"},{"affiliation":null,"fullname":"Kim, Ye-Hwan","name":"Ye-Hwan","pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0002-8676-7119"}],"rank":5,"surname":"Kim"},{"affiliation":null,"fullname":"Yan, Chunri","name":"Chunri","pid":[],"rank":6,"surname":"Yan"},{"affiliation":null,"fullname":"Choi, Young-Ki","name":"Young-Ki","pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0002-1894-9869"}],"rank":7,"surname":"Choi"},{"affiliation":null,"fullname":"Kim, Dongho","name":"Dongho","pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0002-1409-3311"}],"rank":8,"surname":"Kim"},{"affiliation":null,"fullname":"Ryu, Dong Hee","name":"Dong Hee","pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0001-6088-298X"}],"rank":9,"surname":"Ryu"},{"affiliation":null,"fullname":"Ha, Yun-Sok","name":"Yun-Sok","pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0003-3732-9814"}],"rank":10,"surname":"Ha"},{"affiliation":null,"fullname":"Kim, Tae-Hwan","name":"Tae-Hwan","pid":[],"rank":11,"surname":"Kim"},{"affiliation":null,"fullname":"Kwon, Tae Gyun","name":"Tae Gyun","pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0002-4390-0952"}],"rank":12,"surname":"Kwon"},{"affiliation":null,"fullname":"Kim, Jung Min","name":"Jung Min","pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0001-6319-0217"}],"rank":13,"surname":"Kim"},{"affiliation":null,"fullname":"Suh, Sang Heon","name":"Sang Heon","pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0003-4560-8880"}],"rank":14,"surname":"Suh"},{"affiliation":null,"fullname":"Kim, Seon-Kyu","name":"Seon-Kyu","pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0002-4176-5187"}],"rank":15,"surname":"Kim"},{"affiliation":null,"fullname":"Kim, Seon-Young","name":"Seon-Young","pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0002-1030-7730"}],"rank":16,"surname":"Kim"},{"affiliation":null,"fullname":"Kim, Sang Tae","name":"Sang Tae","pid":[],"rank":17,"surname":"Kim"},{"affiliation":null,"fullname":"Kim, Won Tae","name":"Won Tae","pid":[],"rank":18,"surname":"Kim"},{"affiliation":null,"fullname":"Lee, Ok-Jun","name":"Ok-Jun","pid":[],"rank":19,"surname":"Lee"},{"affiliation":null,"fullname":"Moon, Sung-Kwon","name":"Sung-Kwon","pid":[],"rank":20,"surname":"Moon"},{"affiliation":null,"fullname":"Kim, Nam-Hyung","name":"Nam-Hyung","pid":[],"rank":21,"surname":"Kim"},{"affiliation":null,"fullname":"Kim, Isaac Yi","name":"Isaac Yi","pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0002-1967-5281"}],"rank":22,"surname":"Kim"},{"affiliation":null,"fullname":"Kim, Jayoung","name":"Jayoung","pid":[],"rank":23,"surname":"Kim"},{"affiliation":null,"fullname":"Cha, Hee-Jae","name":"Hee-Jae","pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0002-6963-2685"}],"rank":24,"surname":"Cha"},{"affiliation":null,"fullname":"Choi, Yung-Hyun","name":"Yung-Hyun","pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0002-1454-3124"}],"rank":25,"surname":"Choi"},{"affiliation":null,"fullname":"Cha, Eun-Jong","name":"Eun-Jong","pid":[],"rank":26,"surname":"Cha"},{"affiliation":null,"fullname":"Kim, Wun-Jae","name":"Wun-Jae","pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0002-8060-8926"}],"rank":27,"surname":"Kim"}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Original Article"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Fundamental Science for Neurourology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"MicroRNAs"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Prostate Neoplasms"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Herpesviridae"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Prostate Hyperplasia"}],"description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Purpose: Previously, we reported the presence of virus-encoded microRNAs (miRNAs) in the urine of prostate cancer (CaP) patients. In this study, we investigated the expression of two herpes virus-encoded miRNAs in prostate tissue. Methods: A total of 175 tissue samples from noncancerous benign prostatic hyperplasia (BPH), 248 tissue samples from patients with CaP and BPH, and 50 samples from noncancerous surrounding tissues from these same patients were analyzed for the expression of two herpes virus-encoded miRNAs by real-time polymerase chain reaction (PCR) and immunocytochemistry using nanoparticles as molecular beacons. Results: Real-time reverse transcription-PCR results revealed significantly higher expression of hsv1-miR-H18 and hsv2-miRH9- 5p in surrounding noncancerous and CaP tissues than that in BPH tissue (each comparison, P<0.001). Of note, these miRNA were expressed equivalently in the CaP tissues and surrounding noncancerous tissues. Moreover, immunocytochemistry clearly demonstrated a significant enrichment of both hsv1-miR-H18 and hsv2-miR-H9 beacon-labeled cells in CaP and surrounding noncancerous tissue compared to that in BPH tissue (each comparison, P<0.05 for hsv1-miR-H18 and hsv2- miR-H9). Conclusions: These results suggest that increased expression of hsv1-miR-H18 and hsv2-miR-H95p might be associated with tumorigenesis in the prostate. Further studies will be required to elucidate the role of these miRNAs with respect to CaP and herpes viral infections."}],"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2016-06-01"},"embargoenddate":null,"resourcetype":null,"context":[],"instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|opendoar____::eda80a3d5b344bc40f3bc04f65b7a357","value":"PubMed Central"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2016-06-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c","value":"Europe PubMed Central"},"instancetype":{"classid":"0001","classname":"peerReviewed","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":["http://europepmc.org/articles/PMC4932644"]}]} +{"id":"50|doiboost____::0ca46ff10b2b4c756191719d85302b14","dateofcollection":"2019-02-15","title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Increased Expression of Herpes Virus-Encoded hsv1-miR-H18 and hsv2-miR-H9-5p in Cancer-Containing Prostate Tissue Compared to That in Benign Prostate Hyperplasia Tissue"}],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"collectedfrom":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"key":"10|openaire____::8ac8380272269217cb09a928c8caa993","value":"UnpayWall"}],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.5213/inj.1632552.276"}],"author":[{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Chungbuk National University"}],"fullname":"Seok Joong Yun","name":"Seok Joong","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2105974574"}],"rank":1,"surname":"Yun"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Chungbuk National University"}],"fullname":"Pildu Jeong","name":"Pildu","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2041919263"}],"rank":2,"surname":"Jeong"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Chungbuk National University"}],"fullname":"Ho Won Kang","name":"Ho Won","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2164408067"}],"rank":3,"surname":"Kang"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Inha University"}],"fullname":"Helen Ki Shinn","name":"Helen Ki","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2045077081"}],"rank":4,"surname":"Shinn"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Chungbuk National University"}],"fullname":"Ye-Hwan Kim","name":"Ye-Hwan","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2276303457"}],"rank":5,"surname":"Kim"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Chungbuk National University"}],"fullname":"Chunri Yan","name":"Chunri","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2186750404"}],"rank":6,"surname":"Yan"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Chungbuk National University"}],"fullname":"Young-Ki Choi","name":"Young-Ki","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2311466124"}],"rank":7,"surname":"Choi"},{"affiliation":[],"fullname":"Dongho Kim","name":"Dongho","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2644843893"}],"rank":8,"surname":"Kim"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Chungbuk National University"}],"fullname":"Dong Hee Ryu","name":"Dong Hee","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2117604941"}],"rank":9,"surname":"Ryu"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Kyungpook National University"}],"fullname":"Yun-Sok Ha","name":"Yun-Sok","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2145233282"}],"rank":10,"surname":"Ha"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Kyungpook National University"}],"fullname":"Tae-Hwan Kim","name":"Tae-Hwan","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2509096378"}],"rank":11,"surname":"Kim"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Kyungpook National University"}],"fullname":"Tae Gyun Kwon","name":"Tae Gyun","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"1978978081"}],"rank":12,"surname":"Kwon"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Daejeon University"}],"fullname":"Jung Min Kim","name":"Jung Min","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2265841962"}],"rank":13,"surname":"Kim"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"KAIST"}],"fullname":"Sang Heon Suh","name":"Sang Heon","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2890693470"}],"rank":14,"surname":"Suh"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Korea Research Institute of Bioscience and Biotechnology"}],"fullname":"Seon-Kyu Kim","name":"Seon-Kyu","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2162364977"}],"rank":15,"surname":"Kim"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Korea Research Institute of Bioscience and Biotechnology"}],"fullname":"Seon-Young Kim","name":"Seon-Young","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2344797375"}],"rank":16,"surname":"Kim"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Seoul National University Bundang Hospital"}],"fullname":"Sang Tae Kim","name":"Sang Tae","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2257827509"}],"rank":17,"surname":"Kim"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Chungbuk National University"}],"fullname":"Won Tae Kim","name":"Won Tae","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2617237649"}],"rank":18,"surname":"Kim"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Chungbuk National University"}],"fullname":"Ok-Jun Lee","name":"Ok-Jun","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2112231548"}],"rank":19,"surname":"Lee"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Chung-Ang University"}],"fullname":"Sung-Kwon Moon","name":"Sung-Kwon","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2796689429"}],"rank":20,"surname":"Moon"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Chungbuk National University"}],"fullname":"Nam-Hyung Kim","name":"Nam-Hyung","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2136287741"}],"rank":21,"surname":"Kim"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Rutgers University"}],"fullname":"Isaac Yi Kim","name":"Isaac Yi","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2015295992"}],"rank":22,"surname":"Kim"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Harvard University"}],"fullname":"Jayoung Kim","name":"Jayoung","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2130848131"}],"rank":23,"surname":"Kim"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Kosin University"}],"fullname":"Hee-Jae Cha","name":"Hee-Jae","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2113489867"}],"rank":24,"surname":"Cha"},{"affiliation":[],"fullname":"Yung-Hyun Choi","name":"Yung-Hyun","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2151282194"}],"rank":25,"surname":"Choi"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Chungbuk National University"}],"fullname":"Eun-Jong Cha","name":"Eun-Jong","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2109572239"}],"rank":26,"surname":"Cha"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Chungbuk National University"}],"fullname":"Wun-Jae Kim","name":"Wun-Jae","pid":[{"dataInfo":null,"qualifier":{"classid":"MAG Identifier","classname":"MAG Identifier","schemeid":null,"schemename":null},"value":"2113339670"}],"rank":27,"surname":"Kim"}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[],"description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Purpose:"}],"dateofacceptance":null,"embargoenddate":null,"resourcetype":null,"context":null,"instance":null} +{"id":"Previously, we reported the presence of virus-encoded microRNAs (miRNAs) in the urine of prostate cancer (CaP) patients. In this study, we investigated the expression of two herpes virus-encoded miRNAs in prostate tissue.","dateofcollection":"false\u0004\u0004false\u0004false\u0004\u0005\u0005\u0005\u0004\u00032016-6-30","title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":null,"inferred":null,"invisible":null,"provenanceaction":null,"trust":null},"qualifier":{"classid":"","classname":null,"schemeid":null,"schemename":null},"value":"false"},{"dataInfo":{"deletedbyinference":null,"inferenceprovenance":null,"inferred":null,"invisible":null,"provenanceaction":null,"trust":null},"qualifier":null,"value":null}],"publisher":{"dataInfo":{"deletedbyinference":null,"inferenceprovenance":null,"inferred":null,"invisible":null,"provenanceaction":null,"trust":null},"value":""},"bestaccessright":{"classid":"","classname":null,"schemeid":null,"schemename":null},"dataInfo":{"deletedbyinference":null,"inferenceprovenance":"UNKNOWN\u0005not available\u0005dnet:access_modes\u0005dnet:access_modes\u0004false\u0006\u0006false\u0006false\u0006\u0007\u0007\u0007\u0006\u000510|openaire____::081b82f96300b6a6e3d282bad31cb6e2\u0005Crossref\u0004false\u0006\u0006false\u0006false\u0006\u0007\u0007\u0007\u0006\u0005\u0004\u0004false\u0006\u0006false\u0006false\u0006\u0007\u0007\u0007\u0006\u000510|openaire____::55045bd2a65019fd8e6741a755395c8c\u0005Unknown Repository\u00040001\u0005Article\u0005dnet:publication_resource\u0005dnet:publication_resource\u0004false\u0006\u0006false\u0006false\u0006\u0007\u0007\u0007\u0006\u0005\u0004false\u0006\u0006false\u0006false\u0006\u0007\u0007\u0007\u0006\u0005\u0004false\u0006\u0006false\u0006false\u0006\u0007\u0007\u0007\u0006\u0005\u0004false\u0006\u0006false\u0006false\u0006\u0007\u0007\u0007\u0006\u0005\u0004http://einj.org/upload/pdf/inj-1632552-276.pdf","inferred":null,"invisible":null,"provenanceaction":{"classid":"UNKNOWN\u0005not available\u0005dnet:access_modes\u0005dnet:access_modes","classname":"false\u0006\u0006false\u0006false\u0006\u0007\u0007\u0007\u0006\u000510|openaire____::5f532a3fc4f1ea403f37070f59a7a53a\u0005Microsoft Academic Graph","schemeid":"false\u0006\u0006false\u0006false\u0006\u0007\u0007\u0007\u0006\u0005","schemename":""},"trust":"RESTRICTED\u0005Restricted\u0005dnet:access_modes\u0005dnet:access_modes\u0004false\u0006\u0006false\u0006false\u0006\u0007\u0007\u0007\u0006\u000510|openaire____::081b82f96300b6a6e3d282bad31cb6e2\u0005Crossref\u0004false\u0006\u0006false\u0006false\u0006\u0007\u0007\u0007\u0006\u0005\u0004\u0004false\u0006\u0006false\u0006false\u0006\u0007\u0007\u0007\u0006\u000510|doajarticles::52db9a4f8e176f6e8e1d9f0c1e0a2de0\u0005International Neurourology Journal\u00040001\u0005Article\u0005dnet:publication_resource\u0005dnet:publication_resource\u0004false\u0006\u0006false\u0006false\u0006\u0007\u0007\u0007\u0006\u0005\u0004false\u0006\u0006false\u0006false\u0006\u0007\u0007\u0007\u0006\u0005\u0004false\u0006\u0006false\u0006false\u0006\u0007\u0007\u0007\u0006\u0005\u0004false\u0006\u0006false\u0006false\u0006\u0007\u0007\u0007\u0006\u0005\u0004http://dx.doi.org/10.5213/inj.1632552.276"},"collectedfrom":null,"pid":null,"author":null,"resulttype":null,"language":null,"country":null,"subject":null,"description":null,"dateofacceptance":null,"embargoenddate":null,"resourcetype":null,"context":null,"instance":null} \ No newline at end of file diff --git a/dhp-workflows/dhp-dedup-scholexplorer/pom.xml b/dhp-workflows/dhp-dedup-scholexplorer/pom.xml index dff376c2d..429c8a648 100644 --- a/dhp-workflows/dhp-dedup-scholexplorer/pom.xml +++ b/dhp-workflows/dhp-dedup-scholexplorer/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.1-SNAPSHOT + 1.2.2-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-distcp/pom.xml b/dhp-workflows/dhp-distcp/pom.xml index c13bec8e6..8454c29a4 100644 --- a/dhp-workflows/dhp-distcp/pom.xml +++ b/dhp-workflows/dhp-distcp/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.1-SNAPSHOT + 1.2.2-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-enrichment/pom.xml b/dhp-workflows/dhp-enrichment/pom.xml index fe9833e3e..2dc0f2436 100644 --- a/dhp-workflows/dhp-enrichment/pom.xml +++ b/dhp-workflows/dhp-enrichment/pom.xml @@ -1,11 +1,9 @@ - + dhp-workflows eu.dnetlib.dhp - 1.2.1-SNAPSHOT + 1.2.2-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java index 1c65e8ade..4800def0a 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java @@ -4,6 +4,7 @@ package eu.dnetlib.dhp.bulktag; import static eu.dnetlib.dhp.PropagationConstant.removeOutputDir; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import java.util.ArrayList; import java.util.Optional; import org.apache.commons.io.IOUtils; @@ -100,6 +101,7 @@ public class SparkBulkTagJob { ResultTagger resultTagger = new ResultTagger(); readPath(spark, inputPath, resultClazz) + .map(patchResult(), Encoders.bean(resultClazz)) .map( (MapFunction) value -> resultTagger .enrichContextCriteria( @@ -119,4 +121,17 @@ public class SparkBulkTagJob { .map((MapFunction) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz)); } + // TODO remove this hack as soon as the values fixed by this method will be provided as NON null + private static MapFunction patchResult() { + return (MapFunction) r -> { + if (r.getDataInfo().getDeletedbyinference() == null) { + r.getDataInfo().setDeletedbyinference(false); + } + if (r.getContext() == null) { + r.setContext(new ArrayList<>()); + } + return r; + }; + } + } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfiguration.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfiguration.java index 29ddde15f..844fe2962 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfiguration.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfiguration.java @@ -131,7 +131,7 @@ public class CommunityConfiguration implements Serializable { p -> { if (p.getSnd() == null) return p.getFst(); - if (((SelectionConstraints) p.getSnd()).verifyCriteria(param)) + if (p.getSnd().verifyCriteria(param)) return p.getFst(); else return null; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolver.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolver.java index 3d0db2063..f54a1ceba 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolver.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolver.java @@ -34,7 +34,7 @@ public class VerbResolver implements Serializable { .collect( Collectors .toMap( - value -> (String) ((ClassInfo) value) + value -> (String) value .getAnnotationInfo() .get(0) .getParameterValues() diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java index 98b573102..f28c5aa06 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java @@ -77,9 +77,15 @@ public class PrepareDatasourceCountryAssociation { List allowedtypes, String inputPath, String outputPath) { - String whitelisted = ""; - for (String i : whitelist) { - whitelisted += " OR id = '" + i + "'"; + String whitelisted = " d.id = '" + whitelist.get(0) + "'"; + for (int i = 1; i < whitelist.size(); i++) { + whitelisted += " OR d.id = '" + whitelist.get(i) + "'"; + } + + String allowed = "d.datasourcetype.classid = '" + allowedtypes.get(0) + "'"; + + for (int i = 1; i < allowedtypes.size(); i++) { + allowed += " OR d.datasourcetype.classid = '" + allowedtypes.get(i) + "'"; } Dataset datasource = readPath(spark, inputPath + "/datasource", Datasource.class); @@ -90,26 +96,39 @@ public class PrepareDatasourceCountryAssociation { relation.createOrReplaceTempView("relation"); organization.createOrReplaceTempView("organization"); - String query = "SELECT source dataSourceId, named_struct('classid', country.classid, 'classname', country.classname) country " - + "FROM ( SELECT id " - + " FROM datasource " - + " WHERE (datainfo.deletedbyinference = false " - + whitelisted - + ") " - + getConstraintList("datasourcetype.classid = '", allowedtypes) - + ") d " - + "JOIN ( SELECT source, target " - + " FROM relation " - + " WHERE relclass = '" - + ModelConstants.IS_PROVIDED_BY - + "' " - + " AND datainfo.deletedbyinference = false ) rel " - + "ON d.id = rel.source " - + "JOIN (SELECT id, country " - + " FROM organization " - + " WHERE datainfo.deletedbyinference = false " - + " AND length(country.classid) > 0) o " - + "ON o.id = rel.target"; +// String query = "SELECT source dataSourceId, named_struct('classid', country.classid, 'classname', country.classname) country " +// + "FROM ( SELECT id " +// + " FROM datasource " +// + " WHERE (datainfo.deletedbyinference = false " +// + whitelisted +// + ") " +// + getConstraintList("datasourcetype.classid = '", allowedtypes) +// + ") d " +// + "JOIN ( SELECT source, target " +// + " FROM relation " +// + " WHERE relclass = '" +// + ModelConstants.IS_PROVIDED_BY +// + "' " +// + " AND datainfo.deletedbyinference = false ) rel " +// + "ON d.id = rel.source " +// + "JOIN (SELECT id, country " +// + " FROM organization " +// + " WHERE datainfo.deletedbyinference = false " +// + " AND length(country.classid) > 0) o " +// + "ON o.id = rel.target"; + + String query = "SELECT source dataSourceId, " + + "named_struct('classid', country.classid, 'classname', country.classname) country " + + "FROM datasource d " + + "JOIN relation rel " + + "ON d.id = rel.source " + + "JOIN organization o " + + "ON o.id = rel.target " + + "WHERE rel.datainfo.deletedbyinference = false " + + "and rel.relclass = '" + ModelConstants.IS_PROVIDED_BY + "'" + + "and o.datainfo.deletedbyinference = false " + + "and length(o.country.classid) > 0 " + + "and (" + allowed + " or " + whitelisted + ")"; spark .sql(query) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java index 34b376413..8d0d6c48b 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java @@ -4,7 +4,12 @@ package eu.dnetlib.dhp.countrypropagation; import static eu.dnetlib.dhp.PropagationConstant.*; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; +import java.util.ArrayList; +import java.util.Set; +import java.util.stream.Collectors; + import org.apache.commons.io.IOUtils; +import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; import org.apache.spark.sql.*; import org.apache.spark.sql.Dataset; @@ -13,6 +18,7 @@ import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.oaf.*; +import scala.Tuple2; public class PrepareResultCountrySet { private static final Logger log = LoggerFactory.getLogger(PrepareResultCountrySet.class); @@ -60,6 +66,7 @@ public class PrepareResultCountrySet { conf, isSparkSessionManaged, spark -> { + removeOutputDir(spark, outputPath); getPotentialResultToUpdate( spark, inputPath, @@ -89,10 +96,33 @@ public class PrepareResultCountrySet { spark .sql(RESULT_COUNTRYSET_QUERY) .as(Encoders.bean(ResultCountrySet.class)) - .write() - .option("compression", "gzip") - .mode(SaveMode.Append) - .json(outputPath); + .toJavaRDD() + .mapToPair(value -> new Tuple2<>(value.getResultId(), value)) + .reduceByKey((a, b) -> { + ArrayList countryList = a.getCountrySet(); + Set countryCodes = countryList + .stream() + .map(country -> country.getClassid()) + .collect(Collectors.toSet()); + b + .getCountrySet() + .stream() + .forEach(c -> { + if (!countryCodes.contains(c.getClassid())) { + countryList.add(c); + countryCodes.add(c.getClassid()); + } + + }); + a.setCountrySet(countryList); + return a; + }) + .map(couple -> OBJECT_MAPPER.writeValueAsString(couple._2())) + .saveAsTextFile(outputPath, GzipCodec.class); +// .write() +// .option("compression", "gzip") +// .mode(SaveMode.Append) +// .json(outputPath); } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java index bea847ca7..3fc127064 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java @@ -22,6 +22,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.Lists; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.PacePerson; import eu.dnetlib.dhp.schema.oaf.Author; import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.StructuredProperty; @@ -121,30 +122,39 @@ public class SparkOrcidToResultFromSemRelJob { } private static void enrichAuthor(Author a, List au) { + PacePerson pp = new PacePerson(a.getFullname(), false); for (AutoritativeAuthor aa : au) { - if (enrichAuthor(aa, a)) { + if (enrichAuthor(aa, a, pp.getNormalisedFirstName(), pp.getNormalisedSurname())) { return; } } } - private static boolean enrichAuthor(AutoritativeAuthor autoritative_author, Author author) { + private static boolean enrichAuthor(AutoritativeAuthor autoritative_author, Author author, + String author_name, + String author_surname) { boolean toaddpid = false; if (StringUtils.isNotEmpty(autoritative_author.getSurname())) { if (StringUtils.isNotEmpty(author.getSurname())) { + author_surname = author.getSurname(); + } + if (StringUtils.isNotEmpty(author_surname)) { if (autoritative_author .getSurname() .trim() - .equalsIgnoreCase(author.getSurname().trim())) { + .equalsIgnoreCase(author_surname.trim())) { // have the same surname. Check the name if (StringUtils.isNotEmpty(autoritative_author.getName())) { if (StringUtils.isNotEmpty(author.getName())) { + author_name = author.getName(); + } + if (StringUtils.isNotEmpty(author_name)) { if (autoritative_author .getName() .trim() - .equalsIgnoreCase(author.getName().trim())) { + .equalsIgnoreCase(author_name.trim())) { toaddpid = true; } // they could be differently written (i.e. only the initials of the name @@ -154,7 +164,7 @@ public class SparkOrcidToResultFromSemRelJob { .getName() .trim() .substring(0, 0) - .equalsIgnoreCase(author.getName().trim().substring(0, 0))) { + .equalsIgnoreCase(author_name.trim().substring(0, 0))) { toaddpid = true; } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java index 1f6264c18..0791fd68c 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java @@ -105,11 +105,7 @@ public class SparkResultToProjectThroughSemRelJob { .stream() .forEach( (p -> { - if (potential_update - .getProjectSet() - .contains(p)) { - potential_update.getProjectSet().remove(p); - } + potential_update.getProjectSet().remove(p); })); } String resId = potential_update.getResultId(); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java index 5574aad75..750d333e5 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java @@ -7,6 +7,7 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import java.util.*; import org.apache.commons.io.IOUtils; +import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.*; @@ -19,6 +20,7 @@ import com.google.gson.Gson; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Relation; +import scala.Tuple2; public class PrepareResultCommunitySet { @@ -93,10 +95,24 @@ public class PrepareResultCommunitySet { result_organizationset .map(mapResultCommunityFn(organizationMap), Encoders.bean(ResultCommunityList.class)) .filter(Objects::nonNull) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .json(outputPath); + .toJavaRDD() + .mapToPair(value -> new Tuple2<>(value.getResultId(), value)) + .reduceByKey((a, b) -> { + ArrayList cl = a.getCommunityList(); + b.getCommunityList().stream().forEach(s -> { + if (!cl.contains(s)) { + cl.add(s); + } + }); + a.setCommunityList(cl); + return a; + }) + .map(value -> OBJECT_MAPPER.writeValueAsString(value._2())) + .saveAsTextFile(outputPath, GzipCodec.class); +// .write() +// .mode(SaveMode.Overwrite) +// .option("compression", "gzip") +// .json(outputPath); } private static MapFunction mapResultCommunityFn( diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java index 0c5e1d8be..ff34bd42a 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java @@ -136,9 +136,7 @@ public class SparkResultToOrganizationFromIstRepoJob { .stream() .forEach( rId -> { - if (organization_list.contains(rId)) { - organization_list.remove(rId); - } + organization_list.remove(rId); }); } String resultId = potential_update.getResultId(); diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/ResultToOrganizationJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/ResultToOrganizationJobTest.java index 435b76605..cfcccc5f0 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/ResultToOrganizationJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/ResultToOrganizationJobTest.java @@ -99,6 +99,7 @@ public class ResultToOrganizationJobTest { .map(item -> OBJECT_MAPPER.readValue(item, Relation.class)); Assertions.assertEquals(0, tmp.count()); + FileUtils.deleteDirectory(workingDir.toFile()); } /** @@ -171,6 +172,7 @@ public class ResultToOrganizationJobTest { + "(target = '20|opendoar____::124266ebc4ece2934eb80edfda3f2091' " + "or target = '20|dedup_wf_001::5168917a6aeeea55269daeac1af2ecd2')") .count()); + FileUtils.deleteDirectory(workingDir.toFile()); } @Test @@ -266,5 +268,6 @@ public class ResultToOrganizationJobTest { "relclass = 'isAuthorInstitutionOf' and " + "substring(source, 1,2) = '20' and substring(target, 1, 2) = '50'") .count()); + FileUtils.deleteDirectory(workingDir.toFile()); } } diff --git a/dhp-workflows/dhp-graph-mapper/pom.xml b/dhp-workflows/dhp-graph-mapper/pom.xml index 62968c410..aee3d27c1 100644 --- a/dhp-workflows/dhp-graph-mapper/pom.xml +++ b/dhp-workflows/dhp-graph-mapper/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.1-SNAPSHOT + 1.2.2-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java index b9c4e6c80..5c89d5096 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java @@ -11,13 +11,9 @@ import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.oaiIProvenance; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.qualifier; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty; import static eu.dnetlib.dhp.schema.common.ModelConstants.*; +import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_ACCESS_MODES; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Date; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.util.*; import org.apache.commons.lang3.StringUtils; import org.dom4j.Document; @@ -25,6 +21,7 @@ import org.dom4j.DocumentFactory; import org.dom4j.DocumentHelper; import org.dom4j.Node; +import eu.dnetlib.dhp.schema.common.LicenseComparator; import eu.dnetlib.dhp.schema.oaf.Author; import eu.dnetlib.dhp.schema.oaf.Context; import eu.dnetlib.dhp.schema.oaf.DataInfo; @@ -50,6 +47,10 @@ public abstract class AbstractMdRecordToOafMapper { protected static final String DATACITE_SCHEMA_KERNEL_4 = "http://datacite.org/schema/kernel-4"; protected static final String DATACITE_SCHEMA_KERNEL_3 = "http://datacite.org/schema/kernel-3"; + protected static final Qualifier ORCID_PID_TYPE = qualifier( + "ORCID", "Open Researcher and Contributor ID", DNET_PID_TYPES, DNET_PID_TYPES); + protected static final Qualifier MAG_PID_TYPE = qualifier( + "MAGIdentifier", "Microsoft Academic Graph Identifier", DNET_PID_TYPES, DNET_PID_TYPES); protected static final Map nsContext = new HashMap<>(); @@ -75,8 +76,7 @@ public abstract class AbstractMdRecordToOafMapper { DocumentFactory.getInstance().setXPathNamespaceURIs(nsContext); final Document doc = DocumentHelper - .parseText( - xml.replaceAll(DATACITE_SCHEMA_KERNEL_4, DATACITE_SCHEMA_KERNEL_3)); + .parseText(xml.replaceAll(DATACITE_SCHEMA_KERNEL_4, DATACITE_SCHEMA_KERNEL_3)); final String type = doc.valueOf("//dr:CobjCategory/@type"); final KeyValue collectedFrom = getProvenanceDatasource( @@ -103,7 +103,7 @@ public abstract class AbstractMdRecordToOafMapper { } } - private KeyValue getProvenanceDatasource(Document doc, String xpathId, String xpathName) { + private KeyValue getProvenanceDatasource(final Document doc, final String xpathId, final String xpathName) { final String dsId = doc.valueOf(xpathId); final String dsName = doc.valueOf(xpathName); @@ -111,9 +111,7 @@ public abstract class AbstractMdRecordToOafMapper { return null; } - return keyValue( - createOpenaireId(10, dsId, true), - dsName); + return keyValue(createOpenaireId(10, dsId, true), dsName); } protected List createOafs( @@ -211,8 +209,14 @@ public abstract class AbstractMdRecordToOafMapper { return res; } - protected Relation getRelation(String source, String target, String relType, String subRelType, String relClass, - KeyValue collectedFrom, DataInfo info, long lastUpdateTimestamp) { + protected Relation getRelation(final String source, + final String target, + final String relType, + final String subRelType, + final String relClass, + final KeyValue collectedFrom, + final DataInfo info, + final long lastUpdateTimestamp) { final Relation rel = new Relation(); rel.setRelType(relType); rel.setSubRelType(subRelType); @@ -269,7 +273,9 @@ public abstract class AbstractMdRecordToOafMapper { r.setCoverage(prepareCoverages(doc, info)); r.setContext(prepareContexts(doc, info)); r.setExternalReference(new ArrayList<>()); // NOT PRESENT IN MDSTORES - r.setInstance(prepareInstances(doc, info, collectedFrom, hostedBy)); + final List instances = prepareInstances(doc, info, collectedFrom, hostedBy); + r.setInstance(instances); + r.setBestaccessright(getBestAccessRights(instances)); } private List prepareContexts(final Document doc, final DataInfo info) { @@ -289,7 +295,10 @@ public abstract class AbstractMdRecordToOafMapper { protected abstract Qualifier prepareResourceType(Document doc, DataInfo info); protected abstract List prepareInstances( - Document doc, DataInfo info, KeyValue collectedfrom, KeyValue hostedby); + Document doc, + DataInfo info, + KeyValue collectedfrom, + KeyValue hostedby); protected abstract List> prepareSources(Document doc, DataInfo info); @@ -314,13 +323,16 @@ public abstract class AbstractMdRecordToOafMapper { protected abstract List prepareAuthors(Document doc, DataInfo info); protected abstract List> prepareOtherResearchProductTools( - Document doc, DataInfo info); + Document doc, + DataInfo info); protected abstract List> prepareOtherResearchProductContactGroups( - Document doc, DataInfo info); + Document doc, + DataInfo info); protected abstract List> prepareOtherResearchProductContactPersons( - Document doc, DataInfo info); + Document doc, + DataInfo info); protected abstract Qualifier prepareSoftwareProgrammingLanguage(Document doc, DataInfo info); @@ -329,7 +341,8 @@ public abstract class AbstractMdRecordToOafMapper { protected abstract List prepareSoftwareLicenses(Document doc, DataInfo info); protected abstract List> prepareSoftwareDocumentationUrls( - Document doc, DataInfo info); + Document doc, + DataInfo info); protected abstract List prepareDatasetGeoLocations(Document doc, DataInfo info); @@ -345,6 +358,34 @@ public abstract class AbstractMdRecordToOafMapper { protected abstract Field prepareDatasetStorageDate(Document doc, DataInfo info); + protected static Qualifier getBestAccessRights(List instanceList) { + if (instanceList != null) { + final Optional min = instanceList + .stream() + .map(i -> i.getAccessright()) + .min(new LicenseComparator()); + + final Qualifier rights = min.isPresent() ? min.get() : new Qualifier(); + + if (StringUtils.isBlank(rights.getClassid())) { + rights.setClassid(UNKNOWN); + } + if (StringUtils.isBlank(rights.getClassname()) + || UNKNOWN.equalsIgnoreCase(rights.getClassname())) { + rights.setClassname(NOT_AVAILABLE); + } + if (StringUtils.isBlank(rights.getSchemeid())) { + rights.setSchemeid(DNET_ACCESS_MODES); + } + if (StringUtils.isBlank(rights.getSchemename())) { + rights.setSchemename(DNET_ACCESS_MODES); + } + + return rights; + } + return null; + } + private Journal prepareJournal(final Document doc, final DataInfo info) { final Node n = doc.selectSingleNode("//oaf:journal"); if (n != null) { @@ -358,26 +399,17 @@ public abstract class AbstractMdRecordToOafMapper { final String vol = n.valueOf("@vol"); final String edition = n.valueOf("@edition"); if (StringUtils.isNotBlank(name)) { - return journal( - name, - issnPrinted, - issnOnline, - issnLinking, - ep, - iss, - sp, - vol, - edition, - null, - null, - info); + return journal(name, issnPrinted, issnOnline, issnLinking, ep, iss, sp, vol, edition, null, null, info); } } return null; } protected Qualifier prepareQualifier( - final Node node, final String xpath, final String schemeId, final String schemeName) { + final Node node, + final String xpath, + final String schemeId, + final String schemeName) { final String classId = node.valueOf(xpath); final String className = code2name.get(classId); return qualifier(classId, className, schemeId, schemeName); @@ -401,7 +433,10 @@ public abstract class AbstractMdRecordToOafMapper { } protected List prepareListStructProps( - final Node node, final String xpath, final Qualifier qualifier, final DataInfo info) { + final Node node, + final String xpath, + final Qualifier qualifier, + final DataInfo info) { final List res = new ArrayList<>(); for (final Object o : node.selectNodes(xpath)) { final Node n = (Node) o; @@ -411,19 +446,17 @@ public abstract class AbstractMdRecordToOafMapper { } protected List prepareListStructProps( - final Node node, final String xpath, final DataInfo info) { + final Node node, + final String xpath, + final DataInfo info) { final List res = new ArrayList<>(); for (final Object o : node.selectNodes(xpath)) { final Node n = (Node) o; res .add( structuredProperty( - n.getText(), - n.valueOf("@classid"), - n.valueOf("@classname"), - n.valueOf("@schemeid"), - n.valueOf("@schemename"), - info)); + n.getText(), n.valueOf("@classid"), n.valueOf("@classname"), n.valueOf("@schemeid"), + n.valueOf("@schemename"), info)); } return res; } @@ -449,8 +482,7 @@ public abstract class AbstractMdRecordToOafMapper { final Node n = doc.selectSingleNode("//oaf:datainfo"); if (n == null) { - return dataInfo( - false, null, false, false, REPOSITORY_PROVENANCE_ACTIONS, "0.9"); + return dataInfo(false, null, false, false, REPOSITORY_PROVENANCE_ACTIONS, "0.9"); } final String paClassId = n.valueOf("./oaf:provenanceaction/@classid"); @@ -464,12 +496,8 @@ public abstract class AbstractMdRecordToOafMapper { final String trust = n.valueOf("./oaf:trust"); return dataInfo( - deletedbyinference, - inferenceprovenance, - inferred, - false, - qualifier(paClassId, paClassName, paSchemeId, paSchemeName), - trust); + deletedbyinference, inferenceprovenance, inferred, false, + qualifier(paClassId, paClassName, paSchemeId, paSchemeName), trust); } protected Field prepareField(final Node node, final String xpath, final DataInfo info) { @@ -477,7 +505,9 @@ public abstract class AbstractMdRecordToOafMapper { } protected List> prepareListFields( - final Node node, final String xpath, final DataInfo info) { + final Node node, + final String xpath, + final DataInfo info) { return listFields(info, prepareListString(node, xpath)); } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java index ed09016da..53c0913c2 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java @@ -1,10 +1,19 @@ package eu.dnetlib.dhp.oa.graph.raw; -import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.*; -import static eu.dnetlib.dhp.schema.common.ModelConstants.*; +import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.createOpenaireId; +import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.field; +import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty; +import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_ACCESS_MODES; +import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_LANGUAGES; +import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PUBLICATION_RESOURCE; +import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_RELATED_TO; +import static eu.dnetlib.dhp.schema.common.ModelConstants.PUBLICATION_DATASET; +import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_RESULT; -import java.util.*; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; import java.util.stream.Collectors; import org.apache.commons.lang3.StringUtils; @@ -14,9 +23,16 @@ import org.dom4j.Node; import com.google.common.collect.Lists; -import eu.dnetlib.dhp.oa.graph.raw.common.PacePerson; -import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.common.PacePerson; +import eu.dnetlib.dhp.schema.oaf.Author; +import eu.dnetlib.dhp.schema.oaf.DataInfo; +import eu.dnetlib.dhp.schema.oaf.Field; +import eu.dnetlib.dhp.schema.oaf.GeoLocation; +import eu.dnetlib.dhp.schema.oaf.Instance; +import eu.dnetlib.dhp.schema.oaf.KeyValue; +import eu.dnetlib.dhp.schema.oaf.Oaf; +import eu.dnetlib.dhp.schema.oaf.Qualifier; +import eu.dnetlib.dhp.schema.oaf.StructuredProperty; public class OafToOafMapper extends AbstractMdRecordToOafMapper { @@ -39,14 +55,25 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { author.setSurname(p.getNormalisedSurname()); } - final String pid = e.attributeValue("nameIdentifier"); - final String pidType = e.attributeValue("nameIdentifierScheme"); + final String pid = e.valueOf("./@nameIdentifier"); + final String type = e + .valueOf("./@nameIdentifierScheme") + .trim() + .toUpperCase() + .replaceAll(" ", "") + .replaceAll("_", ""); author.setPid(new ArrayList<>()); - if (StringUtils.isNotBlank(pid) && StringUtils.isNotBlank(pidType)) { - author - .getPid() - .add(structuredProperty(pid, qualifier(pidType, pidType, DNET_PID_TYPES, DNET_PID_TYPES), info)); + + if (StringUtils.isNotBlank(pid)) { + if (type.startsWith("ORCID")) { + final String cleanedId = pid + .replaceAll("http://orcid.org/", "") + .replaceAll("https://orcid.org/", ""); + author.getPid().add(structuredProperty(cleanedId, ORCID_PID_TYPE, info)); + } else if (type.startsWith("MAGID")) { + author.getPid().add(structuredProperty(pid, MAG_PID_TYPE, info)); + } } res.add(author); @@ -104,28 +131,21 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { final Instance instance = new Instance(); instance .setInstancetype( - prepareQualifier( - doc, - "//dr:CobjCategory", - DNET_PUBLICATION_RESOURCE, - DNET_PUBLICATION_RESOURCE)); + prepareQualifier(doc, "//dr:CobjCategory", DNET_PUBLICATION_RESOURCE, DNET_PUBLICATION_RESOURCE)); instance.setCollectedfrom(collectedfrom); instance.setHostedby(hostedby); instance.setDateofacceptance(field(doc.valueOf("//oaf:dateAccepted"), info)); instance.setDistributionlocation(doc.valueOf("//oaf:distributionlocation")); instance - .setAccessright( - prepareQualifier(doc, "//oaf:accessrights", DNET_ACCESS_MODES, DNET_ACCESS_MODES)); + .setAccessright(prepareQualifier(doc, "//oaf:accessrights", DNET_ACCESS_MODES, DNET_ACCESS_MODES)); instance.setLicense(field(doc.valueOf("//oaf:license"), info)); instance.setRefereed(field(doc.valueOf("//oaf:refereed"), info)); instance - .setProcessingchargeamount( - field(doc.valueOf("//oaf:processingchargeamount"), info)); + .setProcessingchargeamount(field(doc.valueOf("//oaf:processingchargeamount"), info)); instance - .setProcessingchargecurrency( - field(doc.valueOf("//oaf:processingchargeamount/@currency"), info)); + .setProcessingchargecurrency(field(doc.valueOf("//oaf:processingchargeamount/@currency"), info)); - List nodes = Lists.newArrayList(doc.selectNodes("//dc:identifier")); + final List nodes = Lists.newArrayList(doc.selectNodes("//dc:identifier")); instance .setUrl( nodes @@ -158,19 +178,22 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { @Override protected Field prepareSoftwareCodeRepositoryUrl( - final Document doc, final DataInfo info) { + final Document doc, + final DataInfo info) { return null; // NOT PRESENT IN OAF } @Override protected List prepareSoftwareLicenses( - final Document doc, final DataInfo info) { + final Document doc, + final DataInfo info) { return new ArrayList<>(); // NOT PRESENT IN OAF } @Override protected List> prepareSoftwareDocumentationUrls( - final Document doc, final DataInfo info) { + final Document doc, + final DataInfo info) { return new ArrayList<>(); // NOT PRESENT IN OAF } @@ -182,13 +205,15 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { @Override protected Field prepareDatasetMetadataVersionNumber( - final Document doc, final DataInfo info) { + final Document doc, + final DataInfo info) { return null; // NOT PRESENT IN OAF } @Override protected Field prepareDatasetLastMetadataUpdate( - final Document doc, final DataInfo info) { + final Document doc, + final DataInfo info) { return null; // NOT PRESENT IN OAF } @@ -216,19 +241,22 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { @Override protected List> prepareOtherResearchProductTools( - final Document doc, final DataInfo info) { + final Document doc, + final DataInfo info) { return new ArrayList<>(); // NOT PRESENT IN OAF } @Override protected List> prepareOtherResearchProductContactGroups( - final Document doc, final DataInfo info) { + final Document doc, + final DataInfo info) { return new ArrayList<>(); // NOT PRESENT IN OAF } @Override protected List> prepareOtherResearchProductContactPersons( - final Document doc, final DataInfo info) { + final Document doc, + final DataInfo info) { return new ArrayList<>(); // NOT PRESENT IN OAF } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java index 30b980c42..b9a159917 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java @@ -4,16 +4,31 @@ package eu.dnetlib.dhp.oa.graph.raw; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.createOpenaireId; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.field; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty; -import static eu.dnetlib.dhp.schema.common.ModelConstants.*; +import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_ACCESS_MODES; +import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_DATA_CITE_DATE; +import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_DATA_CITE_RESOURCE; +import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_LANGUAGES; +import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PUBLICATION_RESOURCE; +import static eu.dnetlib.dhp.schema.common.ModelConstants.HAS_PARTS; +import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PART_OF; +import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_SUPPLEMENTED_BY; +import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_SUPPLEMENT_TO; +import static eu.dnetlib.dhp.schema.common.ModelConstants.PART; +import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_RESULT; +import static eu.dnetlib.dhp.schema.common.ModelConstants.SUPPLEMENT; -import java.util.*; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; import org.apache.commons.lang3.StringUtils; import org.dom4j.Document; import org.dom4j.Node; -import eu.dnetlib.dhp.oa.graph.raw.common.PacePerson; -import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.common.PacePerson; import eu.dnetlib.dhp.schema.oaf.Author; import eu.dnetlib.dhp.schema.oaf.DataInfo; import eu.dnetlib.dhp.schema.oaf.Field; @@ -22,7 +37,6 @@ import eu.dnetlib.dhp.schema.oaf.Instance; import eu.dnetlib.dhp.schema.oaf.KeyValue; import eu.dnetlib.dhp.schema.oaf.Oaf; import eu.dnetlib.dhp.schema.oaf.Qualifier; -import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.schema.oaf.StructuredProperty; public class OdfToOafMapper extends AbstractMdRecordToOafMapper { @@ -48,7 +62,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { final String fullname = n.valueOf("./datacite:creatorName"); author.setFullname(fullname); - PacePerson pp = new PacePerson(fullname, false); + final PacePerson pp = new PacePerson(fullname, false); final String name = n.valueOf("./datacite:givenName"); if (StringUtils.isBlank(name) & pp.isAccurate()) { author.setName(pp.getNormalisedFirstName()); @@ -63,6 +77,10 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { author.setSurname(surname); } + if (StringUtils.isBlank(author.getFullname())) { + author.setFullname(String.format("%s, %s", author.getSurname(), author.getName())); + } + author.setAffiliation(prepareListFields(n, "./datacite:affiliation", info)); author.setPid(preparePids(n, info)); author.setRank(pos++); @@ -74,13 +92,21 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { private List preparePids(final Node n, final DataInfo info) { final List res = new ArrayList<>(); for (final Object o : n.selectNodes("./datacite:nameIdentifier")) { - res - .add( - structuredProperty( - ((Node) o).getText(), - prepareQualifier( - (Node) o, "./@nameIdentifierScheme", DNET_PID_TYPES, DNET_PID_TYPES), - info)); + + final String id = ((Node) o).getText(); + final String type = ((Node) o) + .valueOf("./@nameIdentifierScheme") + .trim() + .toUpperCase() + .replaceAll(" ", "") + .replaceAll("_", ""); + + if (type.startsWith("ORCID")) { + final String cleanedId = id.replaceAll("http://orcid.org/", "").replaceAll("https://orcid.org/", ""); + res.add(structuredProperty(cleanedId, ORCID_PID_TYPE, info)); + } else if (type.startsWith("MAGID")) { + res.add(structuredProperty(id, MAG_PID_TYPE, info)); + } } return res; } @@ -95,21 +121,18 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { final Instance instance = new Instance(); instance .setInstancetype( - prepareQualifier( - doc, "//dr:CobjCategory", DNET_PUBLICATION_RESOURCE, DNET_PUBLICATION_RESOURCE)); + prepareQualifier(doc, "//dr:CobjCategory", DNET_PUBLICATION_RESOURCE, DNET_PUBLICATION_RESOURCE)); instance.setCollectedfrom(collectedfrom); instance.setHostedby(hostedby); instance.setDateofacceptance(field(doc.valueOf("//oaf:dateAccepted"), info)); instance.setDistributionlocation(doc.valueOf("//oaf:distributionlocation")); instance - .setAccessright( - prepareQualifier(doc, "//oaf:accessrights", DNET_ACCESS_MODES, DNET_ACCESS_MODES)); + .setAccessright(prepareQualifier(doc, "//oaf:accessrights", DNET_ACCESS_MODES, DNET_ACCESS_MODES)); instance.setLicense(field(doc.valueOf("//oaf:license"), info)); instance.setRefereed(field(doc.valueOf("//oaf:refereed"), info)); instance.setProcessingchargeamount(field(doc.valueOf("//oaf:processingchargeamount"), info)); instance - .setProcessingchargecurrency( - field(doc.valueOf("//oaf:processingchargeamount/@currency"), info)); + .setProcessingchargecurrency(field(doc.valueOf("//oaf:processingchargeamount/@currency"), info)); final Set url = new HashSet<>(); for (final Object o : doc.selectNodes("//datacite:alternateIdentifier[@alternateIdentifierType='URL']")) { @@ -149,11 +172,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { res .add( structuredProperty( - ((Node) o).getText(), - "UNKNOWN", - "UNKNOWN", - DNET_DATA_CITE_DATE, - DNET_DATA_CITE_DATE, + ((Node) o).getText(), "UNKNOWN", "UNKNOWN", DNET_DATA_CITE_DATE, DNET_DATA_CITE_DATE, info)); } } @@ -197,53 +216,52 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { @Override protected List> prepareOtherResearchProductTools( - final Document doc, final DataInfo info) { + final Document doc, + final DataInfo info) { return new ArrayList<>(); // Not present in ODF ??? } @Override protected List> prepareOtherResearchProductContactGroups( - final Document doc, final DataInfo info) { + final Document doc, + final DataInfo info) { return prepareListFields( - doc, - "//datacite:contributor[@contributorType='ContactGroup']/datacite:contributorName", - info); + doc, "//datacite:contributor[@contributorType='ContactGroup']/datacite:contributorName", info); } @Override protected List> prepareOtherResearchProductContactPersons( - final Document doc, final DataInfo info) { + final Document doc, + final DataInfo info) { return prepareListFields( - doc, - "//datacite:contributor[@contributorType='ContactPerson']/datacite:contributorName", - info); + doc, "//datacite:contributor[@contributorType='ContactPerson']/datacite:contributorName", info); } @Override protected Qualifier prepareSoftwareProgrammingLanguage(final Document doc, final DataInfo info) { - return prepareQualifier( - doc, "//datacite:format", "dnet:programming_languages", "dnet:programming_languages"); + return prepareQualifier(doc, "//datacite:format", "dnet:programming_languages", "dnet:programming_languages"); } @Override protected Field prepareSoftwareCodeRepositoryUrl( - final Document doc, final DataInfo info) { + final Document doc, + final DataInfo info) { return null; // Not present in ODF ??? } @Override protected List prepareSoftwareLicenses( - final Document doc, final DataInfo info) { + final Document doc, + final DataInfo info) { return new ArrayList<>(); // Not present in ODF ??? } @Override protected List> prepareSoftwareDocumentationUrls( - final Document doc, final DataInfo info) { + final Document doc, + final DataInfo info) { return prepareListFields( - doc, - "//datacite:relatedIdentifier[@relatedIdentifierType='URL' and @relationType='IsDocumentedBy']", - info); + doc, "//datacite:relatedIdentifier[@relatedIdentifierType='URL' and @relationType='IsDocumentedBy']", info); } // DATASETS @@ -264,13 +282,15 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { @Override protected Field prepareDatasetMetadataVersionNumber( - final Document doc, final DataInfo info) { + final Document doc, + final DataInfo info) { return null; // Not present in ODF ??? } @Override protected Field prepareDatasetLastMetadataUpdate( - final Document doc, final DataInfo info) { + final Document doc, + final DataInfo info) { return prepareField(doc, "//datacite:date[@dateType='Updated']", info); } @@ -346,9 +366,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { @Override protected Qualifier prepareResourceType(final Document doc, final DataInfo info) { return prepareQualifier( - doc, - "//*[local-name() = 'resource']//*[local-name() = 'resourceType']", - DNET_DATA_CITE_RESOURCE, + doc, "//*[local-name() = 'resource']//*[local-name() = 'resourceType']", DNET_DATA_CITE_RESOURCE, DNET_DATA_CITE_RESOURCE); } } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index 5a006e351..b9da9fb29 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -21,7 +21,14 @@ import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.Author; +import eu.dnetlib.dhp.schema.oaf.Dataset; +import eu.dnetlib.dhp.schema.oaf.Field; +import eu.dnetlib.dhp.schema.oaf.Oaf; +import eu.dnetlib.dhp.schema.oaf.Publication; +import eu.dnetlib.dhp.schema.oaf.Relation; +import eu.dnetlib.dhp.schema.oaf.Software; +import eu.dnetlib.dhp.schema.oaf.StructuredProperty; @ExtendWith(MockitoExtension.class) public class MappersTest { @@ -54,13 +61,13 @@ public class MappersTest { assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue())); assertTrue(p.getAuthor().size() > 0); - Optional author = p + final Optional author = p .getAuthor() .stream() .filter(a -> a.getPid() != null && !a.getPid().isEmpty()) .findFirst(); assertTrue(author.isPresent()); - StructuredProperty pid = author + final StructuredProperty pid = author .get() .getPid() .stream() @@ -68,7 +75,7 @@ public class MappersTest { .get(); assertEquals("0000-0001-6651-1178", pid.getValue()); assertEquals("ORCID", pid.getQualifier().getClassid()); - assertEquals("ORCID", pid.getQualifier().getClassname()); + assertEquals("Open Researcher and Contributor ID", pid.getQualifier().getClassname()); assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemeid()); assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemename()); assertEquals("Votsi,Nefta", author.get().getFullname()); @@ -78,8 +85,19 @@ public class MappersTest { assertTrue(p.getSubject().size() > 0); assertTrue(StringUtils.isNotBlank(p.getJournal().getIssnOnline())); assertTrue(StringUtils.isNotBlank(p.getJournal().getName())); - assertTrue(p.getInstance().size() > 0); + assertNotNull(p.getInstance()); + assertTrue(p.getInstance().size() > 0); + p + .getInstance() + .stream() + .forEach(i -> { + assertNotNull(i.getAccessright()); + assertEquals("OPEN", i.getAccessright().getClassid()); + }); + + assertNotNull(p.getBestaccessright()); + assertEquals("OPEN", p.getBestaccessright().getClassid()); assertValidId(r1.getSource()); assertValidId(r1.getTarget()); assertValidId(r2.getSource()); @@ -121,13 +139,13 @@ public class MappersTest { assertTrue(StringUtils.isNotBlank(d.getTitle().get(0).getValue())); assertTrue(d.getAuthor().size() > 0); - Optional author = d + final Optional author = d .getAuthor() .stream() .filter(a -> a.getPid() != null && !a.getPid().isEmpty()) .findFirst(); assertTrue(author.isPresent()); - StructuredProperty pid = author + final StructuredProperty pid = author .get() .getPid() .stream() @@ -135,7 +153,7 @@ public class MappersTest { .get(); assertEquals("0000-0001-9074-1619", pid.getValue()); assertEquals("ORCID", pid.getQualifier().getClassid()); - assertEquals("ORCID", pid.getQualifier().getClassname()); + assertEquals("Open Researcher and Contributor ID", pid.getQualifier().getClassname()); assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemeid()); assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemename()); assertEquals("Baracchini, Theo", author.get().getFullname()); @@ -143,13 +161,13 @@ public class MappersTest { assertEquals("Theo", author.get().getName()); assertEquals(1, author.get().getAffiliation().size()); - Optional> opAff = author + final Optional> opAff = author .get() .getAffiliation() .stream() .findFirst(); assertTrue(opAff.isPresent()); - Field affiliation = opAff.get(); + final Field affiliation = opAff.get(); assertEquals("ISTI-CNR", affiliation.getValue()); assertTrue(d.getSubject().size() > 0); @@ -157,6 +175,16 @@ public class MappersTest { assertTrue(d.getContext().size() > 0); assertTrue(d.getContext().get(0).getId().length() > 0); + assertNotNull(d.getInstance()); + assertTrue(d.getInstance().size() > 0); + d + .getInstance() + .stream() + .forEach(i -> { + assertNotNull(i.getAccessright()); + assertEquals("OPEN", i.getAccessright().getClassid()); + }); + assertValidId(r1.getSource()); assertValidId(r1.getTarget()); assertValidId(r2.getSource()); diff --git a/dhp-workflows/dhp-graph-provision-scholexplorer/pom.xml b/dhp-workflows/dhp-graph-provision-scholexplorer/pom.xml index e0ee03660..e0ce739cf 100644 --- a/dhp-workflows/dhp-graph-provision-scholexplorer/pom.xml +++ b/dhp-workflows/dhp-graph-provision-scholexplorer/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.1-SNAPSHOT + 1.2.2-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-graph-provision/pom.xml b/dhp-workflows/dhp-graph-provision/pom.xml index 39699b3b6..62bf7186c 100644 --- a/dhp-workflows/dhp-graph-provision/pom.xml +++ b/dhp-workflows/dhp-graph-provision/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.1-SNAPSHOT + 1.2.2-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java index dbdc54fc0..72d68a389 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java @@ -3,7 +3,9 @@ package eu.dnetlib.dhp.oa.provision; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import java.util.HashSet; import java.util.Optional; +import java.util.Set; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; @@ -19,8 +21,10 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.base.Splitter; import com.google.common.collect.Iterables; import com.google.common.collect.Iterators; +import com.google.common.collect.Sets; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.HdfsSupport; @@ -58,6 +62,8 @@ public class PrepareRelationsJob { public static final int MAX_RELS = 100; + public static final int DEFAULT_NUM_PARTITIONS = 3000; + public static void main(String[] args) throws Exception { String jsonConfiguration = IOUtils .toString( @@ -79,6 +85,24 @@ public class PrepareRelationsJob { String outputPath = parser.get("outputPath"); log.info("outputPath: {}", outputPath); + int relPartitions = Optional + .ofNullable(parser.get("relPartitions")) + .map(Integer::valueOf) + .orElse(DEFAULT_NUM_PARTITIONS); + log.info("relPartitions: {}", relPartitions); + + Set relationFilter = Optional + .ofNullable(parser.get("relationFilter")) + .map(s -> Sets.newHashSet(Splitter.on(",").split(s))) + .orElse(new HashSet<>()); + log.info("relationFilter: {}", relationFilter); + + int maxRelations = Optional + .ofNullable(parser.get("maxRelations")) + .map(Integer::valueOf) + .orElse(MAX_RELS); + log.info("maxRelations: {}", maxRelations); + SparkConf conf = new SparkConf(); runWithSparkSession( @@ -86,25 +110,74 @@ public class PrepareRelationsJob { isSparkSessionManaged, spark -> { removeOutputDir(spark, outputPath); - prepareRelationsFromPaths(spark, inputRelationsPath, outputPath); + prepareRelationsRDD( + spark, inputRelationsPath, outputPath, relationFilter, relPartitions, maxRelations); }); } - private static void prepareRelationsFromPaths( - SparkSession spark, String inputRelationsPath, String outputPath) { + /** + * Dataset based implementation that prepares the graph relations by limiting the number of outgoing links and + * filtering the relation types according to the given criteria. + * + * @param spark the spark session + * @param inputRelationsPath source path for the graph relations + * @param outputPath output path for the processed relations + * @param relationFilter set of relation filters applied to the `relClass` field + * @param maxRelations maximum number of allowed outgoing edges + */ + private static void prepareRelations( + SparkSession spark, String inputRelationsPath, String outputPath, Set relationFilter, + int maxRelations) { readPathRelation(spark, inputRelationsPath) .filter("dataInfo.deletedbyinference == false") + .filter((FilterFunction) rel -> !relationFilter.contains(rel.getRelClass())) .groupByKey( (MapFunction) value -> value.getSource(), Encoders.STRING()) .flatMapGroups( (FlatMapGroupsFunction) (key, values) -> Iterators - .limit(values, MAX_RELS), + .limit(values, maxRelations), Encoders.bean(SortableRelation.class)) .write() .mode(SaveMode.Overwrite) .parquet(outputPath); } + /** + * RDD based implementation that prepares the graph relations by limiting the number of outgoing links and filtering + * the relation types according to the given criteria. Moreover, outgoing links kept within the given limit are + * prioritized according to the weights indicated in eu.dnetlib.dhp.oa.provision.model.SortableRelation. + * + * @param spark the spark session + * @param inputRelationsPath source path for the graph relations + * @param outputPath output path for the processed relations + * @param relationFilter set of relation filters applied to the `relClass` field + * @param maxRelations maximum number of allowed outgoing edges + */ + // TODO work in progress + private static void prepareRelationsRDD( + SparkSession spark, String inputRelationsPath, String outputPath, Set relationFilter, int relPartitions, + int maxRelations) { + JavaRDD rels = readPathRelationRDD(spark, inputRelationsPath).repartition(relPartitions); + RelationPartitioner partitioner = new RelationPartitioner(rels.getNumPartitions()); + + // only consider those that are not virtually deleted + RDD d = rels + .filter(rel -> !rel.getDataInfo().getDeletedbyinference()) + .filter(rel -> !relationFilter.contains(rel.getRelClass())) + .mapToPair( + (PairFunction) rel -> new Tuple2<>(rel, rel)) + .groupByKey(partitioner) + .map(group -> Iterables.limit(group._2(), maxRelations)) + .flatMap(group -> group.iterator()) + .rdd(); + + spark + .createDataset(d, Encoders.bean(SortableRelation.class)) + .write() + .mode(SaveMode.Overwrite) + .parquet(outputPath); + } + /** * Reads a Dataset of eu.dnetlib.dhp.oa.provision.model.SortableRelation objects from a newline delimited json text * file, @@ -123,31 +196,6 @@ public class PrepareRelationsJob { Encoders.bean(SortableRelation.class)); } - // TODO work in progress - private static void prepareRelationsRDDFromPaths( - SparkSession spark, String inputRelationsPath, String outputPath, int numPartitions) { - JavaRDD rels = readPathRelationRDD(spark, inputRelationsPath).repartition(numPartitions); - - RDD d = rels - .filter(rel -> !rel.getDataInfo().getDeletedbyinference()) // only - // consider - // those - // that are not virtually - // deleted - .mapToPair( - (PairFunction) rel -> new Tuple2<>(rel, rel)) - .groupByKey(new RelationPartitioner(rels.getNumPartitions())) - .map(p -> Iterables.limit(p._2(), MAX_RELS)) - .flatMap(p -> p.iterator()) - .rdd(); - - spark - .createDataset(d, Encoders.bean(SortableRelation.class)) - .write() - .mode(SaveMode.Overwrite) - .parquet(outputPath); - } - private static JavaRDD readPathRelationRDD( SparkSession spark, final String inputPath) { JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/SortableRelation.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/SortableRelation.java index 7c866001b..b6571b9bf 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/SortableRelation.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/SortableRelation.java @@ -16,10 +16,10 @@ public class SortableRelation extends Relation implements Comparable, static { weights.put("outcome", 0); weights.put("supplement", 1); - weights.put("publicationDataset", 2); + weights.put("affiliation", 2); weights.put("relationship", 3); - weights.put("similarity", 4); - weights.put("affiliation", 5); + weights.put("publicationDataset", 4); + weights.put("similarity", 5); weights.put("provision", 6); weights.put("participation", 7); diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateFactory.java index 6cb025b4f..21b526ab1 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateFactory.java @@ -5,6 +5,7 @@ import static eu.dnetlib.dhp.oa.provision.utils.GraphMappingUtils.removePrefix; import static eu.dnetlib.dhp.oa.provision.utils.XmlSerializationUtils.escapeXml; import java.io.IOException; +import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.stream.Collectors; @@ -95,7 +96,7 @@ public class TemplateFactory { .add("metadata", instancemetadata) .add( "webresources", - webresources + (webresources != null ? webresources : new ArrayList()) .stream() .filter(StringUtils::isNotBlank) .map(w -> getWebResource(w)) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java index ce1c71312..f99298130 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java @@ -174,6 +174,7 @@ public class XmlRecordFactory implements Serializable { entity .getCollectedfrom() .stream() + .filter(XmlRecordFactory::kvNotBlank) .map(kv -> XmlSerializationUtils.mapKeyValue("collectedfrom", kv)) .collect(Collectors.toList())); } @@ -183,6 +184,7 @@ public class XmlRecordFactory implements Serializable { entity .getOriginalId() .stream() + .filter(Objects::nonNull) .map(s -> XmlSerializationUtils.asXmlElement("originalId", s)) .collect(Collectors.toList())); } @@ -192,6 +194,7 @@ public class XmlRecordFactory implements Serializable { entity .getPid() .stream() + .filter(Objects::nonNull) .map(p -> XmlSerializationUtils.mapStructuredProperty("pid", p)) .collect(Collectors.toList())); } @@ -213,6 +216,7 @@ public class XmlRecordFactory implements Serializable { r .getTitle() .stream() + .filter(Objects::nonNull) .map(t -> XmlSerializationUtils.mapStructuredProperty("title", t)) .collect(Collectors.toList())); } @@ -225,6 +229,7 @@ public class XmlRecordFactory implements Serializable { r .getAuthor() .stream() + .filter(Objects::nonNull) .map( a -> { final StringBuilder sb = new StringBuilder(" isNotBlank(sp.getQualifier().getClassid()) && isNotBlank(sp.getValue())) + .collect( + Collectors + .toMap( + p -> getAuthorPidType(p.getQualifier().getClassid()), + p -> p, + (p1, p2) -> p1)) + .values() .forEach( sp -> { - String pidType = XmlSerializationUtils - .escapeXml( - sp.getQualifier().getClassid()) - .replaceAll("\\W", ""); + String pidType = getAuthorPidType(sp.getQualifier().getClassid()); String pidValue = XmlSerializationUtils.escapeXml(sp.getValue()); - // ugly hack: some records - // provide swapped pidtype and - // pidvalue + // ugly hack: some records provide swapped pidtype and pidvalue if (authorPidTypes.contains(pidValue.toLowerCase().trim())) { sb.append(String.format(" %s=\"%s\"", pidValue, pidType)); } else { - pidType = pidType.replaceAll("\\W", "").replaceAll("\\d", ""); if (isNotBlank(pidType)) { sb .append( @@ -285,6 +292,7 @@ public class XmlRecordFactory implements Serializable { r .getContributor() .stream() + .filter(Objects::nonNull) .map(c -> XmlSerializationUtils.asXmlElement("contributor", c.getValue())) .collect(Collectors.toList())); } @@ -294,6 +302,7 @@ public class XmlRecordFactory implements Serializable { r .getCountry() .stream() + .filter(Objects::nonNull) .map(c -> XmlSerializationUtils.mapQualifier("country", c)) .collect(Collectors.toList())); } @@ -303,6 +312,7 @@ public class XmlRecordFactory implements Serializable { r .getCoverage() .stream() + .filter(Objects::nonNull) .map(c -> XmlSerializationUtils.asXmlElement("coverage", c.getValue())) .collect(Collectors.toList())); } @@ -319,6 +329,7 @@ public class XmlRecordFactory implements Serializable { r .getDescription() .stream() + .filter(Objects::nonNull) .map(c -> XmlSerializationUtils.asXmlElement("description", c.getValue())) .collect(Collectors.toList())); } @@ -333,6 +344,7 @@ public class XmlRecordFactory implements Serializable { r .getSubject() .stream() + .filter(Objects::nonNull) .map(s -> XmlSerializationUtils.mapStructuredProperty("subject", s)) .collect(Collectors.toList())); } @@ -345,6 +357,7 @@ public class XmlRecordFactory implements Serializable { r .getRelevantdate() .stream() + .filter(Objects::nonNull) .map(s -> XmlSerializationUtils.mapStructuredProperty("relevantdate", s)) .collect(Collectors.toList())); } @@ -357,6 +370,7 @@ public class XmlRecordFactory implements Serializable { r .getSource() .stream() + .filter(Objects::nonNull) .map(c -> XmlSerializationUtils.asXmlElement("source", c.getValue())) .collect(Collectors.toList())); } @@ -366,6 +380,7 @@ public class XmlRecordFactory implements Serializable { r .getFormat() .stream() + .filter(Objects::nonNull) .map(c -> XmlSerializationUtils.asXmlElement("format", c.getValue())) .collect(Collectors.toList())); } @@ -429,6 +444,7 @@ public class XmlRecordFactory implements Serializable { orp .getContactperson() .stream() + .filter(Objects::nonNull) .map(c -> XmlSerializationUtils.asXmlElement("contactperson", c.getValue())) .collect(Collectors.toList())); } @@ -439,6 +455,7 @@ public class XmlRecordFactory implements Serializable { orp .getContactgroup() .stream() + .filter(Objects::nonNull) .map(c -> XmlSerializationUtils.asXmlElement("contactgroup", c.getValue())) .collect(Collectors.toList())); } @@ -448,6 +465,7 @@ public class XmlRecordFactory implements Serializable { orp .getTool() .stream() + .filter(Objects::nonNull) .map(c -> XmlSerializationUtils.asXmlElement("tool", c.getValue())) .collect(Collectors.toList())); } @@ -461,6 +479,7 @@ public class XmlRecordFactory implements Serializable { s .getDocumentationUrl() .stream() + .filter(Objects::nonNull) .map(c -> XmlSerializationUtils.asXmlElement("documentationUrl", c.getValue())) .collect(Collectors.toList())); } @@ -470,6 +489,7 @@ public class XmlRecordFactory implements Serializable { s .getLicense() .stream() + .filter(Objects::nonNull) .map(l -> XmlSerializationUtils.mapStructuredProperty("license", l)) .collect(Collectors.toList())); } @@ -576,6 +596,7 @@ public class XmlRecordFactory implements Serializable { ds .getOdlanguages() .stream() + .filter(Objects::nonNull) .map(c -> XmlSerializationUtils.asXmlElement("odlanguages", c.getValue())) .collect(Collectors.toList())); } @@ -585,6 +606,7 @@ public class XmlRecordFactory implements Serializable { ds .getOdcontenttypes() .stream() + .filter(Objects::nonNull) .map(c -> XmlSerializationUtils.asXmlElement("odcontenttypes", c.getValue())) .collect(Collectors.toList())); } @@ -697,6 +719,7 @@ public class XmlRecordFactory implements Serializable { ds .getPolicies() .stream() + .filter(XmlRecordFactory::kvNotBlank) .map(kv -> XmlSerializationUtils.mapKeyValue("policies", kv)) .collect(Collectors.toList())); } @@ -709,6 +732,7 @@ public class XmlRecordFactory implements Serializable { ds .getSubjects() .stream() + .filter(Objects::nonNull) .map(sp -> XmlSerializationUtils.mapStructuredProperty("subjects", sp)) .collect(Collectors.toList())); } @@ -735,6 +759,7 @@ public class XmlRecordFactory implements Serializable { o .getAlternativeNames() .stream() + .filter(Objects::nonNull) .map(c -> XmlSerializationUtils.asXmlElement("alternativeNames", c.getValue())) .collect(Collectors.toList())); } @@ -744,7 +769,7 @@ public class XmlRecordFactory implements Serializable { XmlSerializationUtils.asXmlElement("websiteurl", o.getWebsiteurl().getValue())); } if (o.getLogourl() != null) { - metadata.add(XmlSerializationUtils.asXmlElement("websiteurl", o.getLogourl().getValue())); + metadata.add(XmlSerializationUtils.asXmlElement("logourl", o.getLogourl().getValue())); } if (o.getEclegalbody() != null) { @@ -776,13 +801,13 @@ public class XmlRecordFactory implements Serializable { .asXmlElement( "echighereducation", o.getEchighereducation().getValue())); } - if (o.getEcinternationalorganization() != null) { + if (o.getEcinternationalorganizationeurinterests() != null) { metadata .add( XmlSerializationUtils .asXmlElement( "ecinternationalorganizationeurinterests", - o.getEcinternationalorganization().getValue())); + o.getEcinternationalorganizationeurinterests().getValue())); } if (o.getEcinternationalorganization() != null) { metadata @@ -862,6 +887,7 @@ public class XmlRecordFactory implements Serializable { p .getSubjects() .stream() + .filter(Objects::nonNull) .map(sp -> XmlSerializationUtils.mapStructuredProperty("subject", sp)) .collect(Collectors.toList())); } @@ -912,7 +938,12 @@ public class XmlRecordFactory implements Serializable { if (p.getFundingtree() != null) { metadata .addAll( - p.getFundingtree().stream().map(ft -> ft.getValue()).collect(Collectors.toList())); + p + .getFundingtree() + .stream() + .filter(Objects::nonNull) + .map(ft -> ft.getValue()) + .collect(Collectors.toList())); } break; @@ -923,6 +954,17 @@ public class XmlRecordFactory implements Serializable { return metadata; } + private String getAuthorPidType(String s) { + return XmlSerializationUtils + .escapeXml(s) + .replaceAll("\\W", "") + .replaceAll("\\d", ""); + } + + private static boolean kvNotBlank(KeyValue kv) { + return kv != null && StringUtils.isNotBlank(kv.getKey()) && StringUtils.isNotBlank(kv.getValue()); + } + private void mapDatasourceType(List metadata, final Qualifier dsType) { metadata.add(XmlSerializationUtils.mapQualifier("datasourcetype", dsType)); @@ -960,7 +1002,7 @@ public class XmlRecordFactory implements Serializable { .add( XmlSerializationUtils.asXmlElement("coderepositoryurl", re.getCodeRepositoryUrl())); } - if (re.getResulttype() != null & re.getResulttype().isBlank()) { + if (re.getResulttype() != null && re.getResulttype().isBlank()) { metadata.add(XmlSerializationUtils.mapQualifier("resulttype", re.getResulttype())); } if (re.getCollectedfrom() != null) { @@ -969,6 +1011,7 @@ public class XmlRecordFactory implements Serializable { re .getCollectedfrom() .stream() + .filter(XmlRecordFactory::kvNotBlank) .map(kv -> XmlSerializationUtils.mapKeyValue("collectedfrom", kv)) .collect(Collectors.toList())); } @@ -986,10 +1029,10 @@ public class XmlRecordFactory implements Serializable { if (isNotBlank(re.getOfficialname())) { metadata.add(XmlSerializationUtils.asXmlElement("officialname", re.getOfficialname())); } - if (re.getDatasourcetype() != null & !re.getDatasourcetype().isBlank()) { + if (re.getDatasourcetype() != null && !re.getDatasourcetype().isBlank()) { mapDatasourceType(metadata, re.getDatasourcetype()); } - if (re.getOpenairecompatibility() != null & !re.getOpenairecompatibility().isBlank()) { + if (re.getOpenairecompatibility() != null && !re.getOpenairecompatibility().isBlank()) { metadata .add( XmlSerializationUtils @@ -1006,7 +1049,7 @@ public class XmlRecordFactory implements Serializable { .add( XmlSerializationUtils.asXmlElement("legalshortname", re.getLegalshortname())); } - if (re.getCountry() != null & !re.getCountry().isBlank()) { + if (re.getCountry() != null && !re.getCountry().isBlank()) { metadata.add(XmlSerializationUtils.mapQualifier("country", re.getCountry())); } break; @@ -1020,10 +1063,10 @@ public class XmlRecordFactory implements Serializable { if (isNotBlank(re.getAcronym())) { metadata.add(XmlSerializationUtils.asXmlElement("acronym", re.getAcronym())); } - if (re.getContracttype() != null & !re.getContracttype().isBlank()) { + if (re.getContracttype() != null && !re.getContracttype().isBlank()) { metadata.add(XmlSerializationUtils.mapQualifier("contracttype", re.getContracttype())); } - if (re.getFundingtree() != null & contexts != null) { + if (re.getFundingtree() != null && contexts != null) { metadata .addAll( re @@ -1091,12 +1134,12 @@ public class XmlRecordFactory implements Serializable { .add( XmlSerializationUtils.mapQualifier("accessright", instance.getAccessright())); } - if (instance.getCollectedfrom() != null) { + if (instance.getCollectedfrom() != null && kvNotBlank(instance.getCollectedfrom())) { fields .add( XmlSerializationUtils.mapKeyValue("collectedfrom", instance.getCollectedfrom())); } - if (instance.getHostedby() != null) { + if (instance.getHostedby() != null && kvNotBlank(instance.getHostedby())) { fields.add(XmlSerializationUtils.mapKeyValue("hostedby", instance.getHostedby())); } if (instance.getDateofacceptance() != null diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_prepare_relations.json b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_prepare_relations.json index bfb248d01..71b2becc4 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_prepare_relations.json +++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_prepare_relations.json @@ -21,6 +21,18 @@ "paramName": "rp", "paramLongName": "relPartitions", "paramDescription": "number or partitions for the relations Dataset", - "paramRequired": true + "paramRequired": false + }, + { + "paramName": "rf", + "paramLongName": "relationFilter", + "paramDescription": "filter applied reading relations (by relClass)", + "paramRequired": false + }, + { + "paramName": "mr", + "paramLongName": "maxRelations", + "paramDescription": "maximum number of relations allowed for a each entity", + "paramRequired": false } ] diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml index 298ac7589..6983ecf53 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml @@ -9,6 +9,30 @@ isLookupUrl URL for the isLookup service + + relPartitions + number or partitions for the relations Dataset + + + relationFilter + filter applied reading relations (by relClass) + + + maxRelations + maximum number of relations allowed for a each entity + + + otherDsTypeId + mapping used to populate datasourceTypeUi field + + + format + metadata format name (DMF|TMF) + + + batchSize + number of records to be included in each indexing request + sparkDriverMemoryForJoining @@ -56,6 +80,10 @@ spark2EventLogDir spark 2.* event log dir location + + sparkNetworkTimeout + configures spark.network.timeout + @@ -69,12 +97,16 @@ - + - + - ${wf:conf('reuseRecords') eq false} - ${wf:conf('reuseRecords') eq true} + ${wf:conf('resumeFrom') eq 'prepare_relations'} + ${wf:conf('resumeFrom') eq 'fork_join_related_entities'} + ${wf:conf('resumeFrom') eq 'join_all_entities'} + ${wf:conf('resumeFrom') eq 'adjancency_lists'} + ${wf:conf('resumeFrom') eq 'convert_to_xml'} + ${wf:conf('resumeFrom') eq 'to_solr_index'} @@ -309,7 +341,6 @@ - yarn @@ -419,4 +450,5 @@ + \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/pom.xml b/dhp-workflows/dhp-stats-update/pom.xml index 06408937b..d6ec4e6ab 100644 --- a/dhp-workflows/dhp-stats-update/pom.xml +++ b/dhp-workflows/dhp-stats-update/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.1-SNAPSHOT + 1.2.2-SNAPSHOT 4.0.0 dhp-stats-update diff --git a/dhp-workflows/dhp-worfklow-profiles/pom.xml b/dhp-workflows/dhp-worfklow-profiles/pom.xml index 5f99cdc8d..cb20db57e 100644 --- a/dhp-workflows/dhp-worfklow-profiles/pom.xml +++ b/dhp-workflows/dhp-worfklow-profiles/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.1-SNAPSHOT + 1.2.2-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/pom.xml b/dhp-workflows/pom.xml index 271c66939..cf9753da4 100644 --- a/dhp-workflows/pom.xml +++ b/dhp-workflows/pom.xml @@ -6,7 +6,7 @@ eu.dnetlib.dhp dhp - 1.2.1-SNAPSHOT + 1.2.2-SNAPSHOT ../ diff --git a/pom.xml b/pom.xml index 419de3540..e0ee18900 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ 4.0.0 eu.dnetlib.dhp dhp - 1.2.1-SNAPSHOT + 1.2.2-SNAPSHOT pom