From 85ca5622d45569683191713e0d0e06dfcd3f3fbf Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Tue, 19 May 2020 16:17:35 +0200 Subject: [PATCH 1/5] partial implementation of generation of simple events --- .../eu/dnetlib/dhp/broker/model/Topic.java | 63 ++++++++++------ .../broker/oa/GenerateEventsApplication.java | 74 +++++++++++++++++-- .../EnrichMissingPublicationDate.java | 10 ++- .../dhp/broker/oa/matchers/UpdateMatcher.java | 10 ++- 4 files changed, 120 insertions(+), 37 deletions(-) diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/Topic.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/Topic.java index 29f6cbe3a..98088dd0a 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/Topic.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/Topic.java @@ -4,31 +4,48 @@ package eu.dnetlib.dhp.broker.model; public enum Topic { // ENRICHMENT MISSING - ENRICH_MISSING_OA_VERSION("ENRICH/MISSING/OPENACCESS_VERSION"), ENRICH_MISSING_ABSTRACT( - "ENRICH/MISSING/ABSTRACT"), ENRICH_MISSING_PUBLICATION_DATE( - "ENRICH/MISSING/PUBLICATION_DATE"), ENRICH_MISSING_PID( - "ENRICH/MISSING/PID"), ENRICH_MISSING_PROJECT("ENRICH/MISSING/PROJECT"), ENRICH_MISSING_SOFTWARE( - "ENRICH/MISSING/SOFTWARE"), ENRICH_MISSING_SUBJECT_MESHEUROPMC( - "ENRICH/MISSING/SUBJECT/MESHEUROPMC"), ENRICH_MISSING_SUBJECT_ARXIV( - "ENRICH/MISSING/SUBJECT/ARXIV"), ENRICH_MISSING_SUBJECT_JEL( - "ENRICH/MISSING/SUBJECT/JEL"), ENRICH_MISSING_SUBJECT_DDC( - "ENRICH/MISSING/SUBJECT/DDC"), ENRICH_MISSING_SUBJECT_ACM( - "ENRICH/MISSING/SUBJECT/ACM"), ENRICH_MISSING_SUBJECT_RVK( - "ENRICH/MISSING/SUBJECT/RVK"), ENRICH_MISSING_AUTHOR_ORCID( - "ENRICH/MISSING/AUTHOR/ORCID"), + ENRICH_MISSING_OA_VERSION("ENRICH/MISSING/OPENACCESS_VERSION"), + ENRICH_MISSING_ABSTRACT("ENRICH/MISSING/ABSTRACT"), + ENRICH_MISSING_PUBLICATION_DATE("ENRICH/MISSING/PUBLICATION_DATE"), + ENRICH_MISSING_PID("ENRICH/MISSING/PID"), + ENRICH_MISSING_PROJECT("ENRICH/MISSING/PROJECT"), + ENRICH_MISSING_SOFTWARE("ENRICH/MISSING/SOFTWARE"), + ENRICH_MISSING_SUBJECT_MESHEUROPMC("ENRICH/MISSING/SUBJECT/MESHEUROPMC"), + ENRICH_MISSING_SUBJECT_ARXIV("ENRICH/MISSING/SUBJECT/ARXIV"), + ENRICH_MISSING_SUBJECT_JEL("ENRICH/MISSING/SUBJECT/JEL"), + ENRICH_MISSING_SUBJECT_DDC("ENRICH/MISSING/SUBJECT/DDC"), + ENRICH_MISSING_SUBJECT_ACM("ENRICH/MISSING/SUBJECT/ACM"), + ENRICH_MISSING_SUBJECT_RVK("ENRICH/MISSING/SUBJECT/RVK"), + ENRICH_MISSING_AUTHOR_ORCID("ENRICH/MISSING/AUTHOR/ORCID"), // ENRICHMENT MORE - ENRICH_MORE_PID("ENRICH/MORE/PID"), ENRICH_MORE_OA_VERSION("ENRICH/MORE/OPENACCESS_VERSION"), ENRICH_MORE_ABSTRACT( - "ENRICH/MORE/ABSTRACT"), ENRICH_MORE_PUBLICATION_DATE("ENRICH/MORE/PUBLICATION_DATE"), ENRICH_MORE_PROJECT( - "ENRICH/MORE/PROJECT"), ENRICH_MORE_SUBJECT_MESHEUROPMC( - "ENRICH/MORE/SUBJECT/MESHEUROPMC"), ENRICH_MORE_SUBJECT_ARXIV( - "ENRICH/MORE/SUBJECT/ARXIV"), ENRICH_MORE_SUBJECT_JEL( - "ENRICH/MORE/SUBJECT/JEL"), ENRICH_MORE_SUBJECT_DDC( - "ENRICH/MORE/SUBJECT/DDC"), ENRICH_MORE_SUBJECT_ACM( - "ENRICH/MORE/SUBJECT/ACM"), ENRICH_MORE_SUBJECT_RVK("ENRICH/MORE/SUBJECT/RVK"), + ENRICH_MORE_PID("ENRICH/MORE/PID"), + ENRICH_MORE_OA_VERSION("ENRICH/MORE/OPENACCESS_VERSION"), + ENRICH_MORE_ABSTRACT("ENRICH/MORE/ABSTRACT"), + ENRICH_MORE_PUBLICATION_DATE("ENRICH/MORE/PUBLICATION_DATE"), + ENRICH_MORE_PROJECT("ENRICH/MORE/PROJECT"), + ENRICH_MORE_SUBJECT_MESHEUROPMC("ENRICH/MORE/SUBJECT/MESHEUROPMC"), + ENRICH_MORE_SUBJECT_ARXIV("ENRICH/MORE/SUBJECT/ARXIV"), + ENRICH_MORE_SUBJECT_JEL("ENRICH/MORE/SUBJECT/JEL"), + ENRICH_MORE_SUBJECT_DDC("ENRICH/MORE/SUBJECT/DDC"), + ENRICH_MORE_SUBJECT_ACM("ENRICH/MORE/SUBJECT/ACM"), + ENRICH_MORE_SUBJECT_RVK("ENRICH/MORE/SUBJECT/RVK"), // ADDITION - ADD_BY_PROJECT("ADD/BY_PROJECT"); + ADD_BY_PROJECT("ADD/BY_PROJECT"), + + // OTHER RELS + ENRICH_MISSING_PUBLICATION_IS_RELATED_TO("ENRICH/MISSING/PUBLICATION/IS_RELATED_TO"), + ENRICH_MISSING_PUBLICATION_REFERENCES("ENRICH/MISSING/PUBLICATION/REFERENCES"), + ENRICH_MISSING_PUBLICATION_IS_REFERENCED_BY("ENRICH/MISSING/PUBLICATION/IS_REFERENCED_BY"), + ENRICH_MISSING_PUBLICATION_IS_SUPPLEMENTED_TO("ENRICH/MISSING/PUBLICATION/IS_SUPPLEMENTED_TO"), + ENRICH_MISSING_PUBLICATION_IS_SUPPLEMENTED_BY("ENRICH/MISSING/PUBLICATION/IS_SUPPLEMENTED_BY"), + + ENRICH_MISSING_DATASET_IS_RELATED_TO("ENRICH/MISSING/DATASET/IS_RELATED_TO"), + ENRICH_MISSING_DATASET_REFERENCES("ENRICH/MISSING/DATASET/REFERENCES"), + ENRICH_MISSING_DATASET_IS_REFERENCED_BY("ENRICH/MISSING/DATASET/IS_REFERENCED_BY"), + ENRICH_MISSING_DATASET_IS_SUPPLEMENTED_TO("ENRICH/MISSING/DATASET/IS_SUPPLEMENTED_TO"), + ENRICH_MISSING_DATASET_IS_SUPPLEMENTED_BY("ENRICH/MISSING/DATASET/IS_SUPPLEMENTED_BY"); Topic(final String path) { this.path = path; @@ -42,9 +59,7 @@ public enum Topic { public static Topic fromPath(final String path) { for (final Topic t : Topic.values()) { - if (t.getPath().equals(path)) { - return t; - } + if (t.getPath().equals(path)) { return t; } } return null; } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java index 43ebd6dd8..5fdd10925 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java @@ -9,11 +9,21 @@ import java.util.Optional; import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; +import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.Column; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.Row; import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.fasterxml.jackson.databind.ObjectMapper; + import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.broker.model.Event; import eu.dnetlib.dhp.broker.model.EventFactory; @@ -30,7 +40,11 @@ import eu.dnetlib.dhp.broker.oa.matchers.EnrichMoreSubject; import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.common.HdfsSupport; +import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct; +import eu.dnetlib.dhp.schema.oaf.Publication; +import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.schema.oaf.Software; public class GenerateEventsApplication { @@ -47,12 +61,13 @@ public class GenerateEventsApplication { private static final UpdateMatcher enrichMorePid = new EnrichMorePid(); private static final UpdateMatcher enrichMoreSubject = new EnrichMoreSubject(); + public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + public static void main(final String[] args) throws Exception { final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils - .toString( - GenerateEventsApplication.class - .getResourceAsStream("/eu/dnetlib/dhp/oa/graph/merge_claims_parameters.json"))); + .toString(GenerateEventsApplication.class + .getResourceAsStream("/eu/dnetlib/dhp/oa/graph/merge_claims_parameters.json"))); parser.parseArgument(args); final Boolean isSparkSessionManaged = Optional @@ -67,10 +82,23 @@ public class GenerateEventsApplication { final String eventsPath = parser.get("eventsPath"); log.info("eventsPath: {}", eventsPath); + final String resultClassName = parser.get("resultTableName"); + log.info("resultTableName: {}", resultClassName); + final SparkConf conf = new SparkConf(); + runWithSparkSession(conf, isSparkSessionManaged, spark -> { + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); removeOutputDir(spark, eventsPath); - generateEvents(spark, graphPath, eventsPath); + + final JavaRDD eventsRdd = sc.emptyRDD(); + + eventsRdd.union(generateSimpleEvents(spark, graphPath, Publication.class)); + eventsRdd.union(generateSimpleEvents(spark, graphPath, eu.dnetlib.dhp.schema.oaf.Dataset.class)); + eventsRdd.union(generateSimpleEvents(spark, graphPath, Software.class)); + eventsRdd.union(generateSimpleEvents(spark, graphPath, OtherResearchProduct.class)); + + eventsRdd.saveAsTextFile(eventsPath, GzipCodec.class); }); } @@ -79,11 +107,34 @@ public class GenerateEventsApplication { HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration()); } - private static void generateEvents(final SparkSession spark, final String graphPath, final String eventsPath) { - // TODO + private static JavaRDD generateSimpleEvents(final SparkSession spark, + final String graphPath, + final Class resultClazz) { + + final Dataset results = + readPath(spark, graphPath + "/" + resultClazz.getSimpleName().toLowerCase(), resultClazz) + .filter(r -> r.getDataInfo().getDeletedbyinference()); + + final Dataset rels = + readPath(spark, graphPath + "/relation", Relation.class) + .filter(r -> r.getRelClass().equals("TODO")); // TODO mergedIN + + final Column c = null; // TODO + + final Dataset aa = results.joinWith(rels, results.col("id").equalTo(rels.col("source")), "inner") + .groupBy(rels.col("target")) + .agg(c) + .filter(x -> x.size() > 1) + // generateSimpleEvents(...) + // flatMap() + // toRdd() + ; + + return null; + } - private List generateEvents(final Result... children) { + private List generateSimpleEvents(final Result... children) { final List> list = new ArrayList<>(); for (final Result target : children) { @@ -102,4 +153,13 @@ public class GenerateEventsApplication { return list.stream().map(EventFactory::newBrokerEvent).collect(Collectors.toList()); } + public static Dataset readPath( + final SparkSession spark, + final String inputPath, + final Class clazz) { + return spark + .read() + .textFile(inputPath) + .map((MapFunction) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz)); + } } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationDate.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationDate.java index e9ec082c4..372a4e4c9 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationDate.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationDate.java @@ -1,6 +1,7 @@ package eu.dnetlib.dhp.broker.oa.matchers; +import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -16,12 +17,15 @@ public class EnrichMissingPublicationDate extends UpdateMatcher { @Override protected List> findUpdates(final Result source, final Result target) { - // return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f)); - return Arrays.asList(); + if (isMissing(target.getDateofacceptance()) && !isMissing(source.getDateofacceptance())) { + return Arrays.asList(generateUpdateInfo(source.getDateofacceptance().getValue(), source, target)); + } + return new ArrayList<>(); } @Override - public UpdateInfo generateUpdateInfo(final String highlightValue, final Result source, + public UpdateInfo generateUpdateInfo(final String highlightValue, + final Result source, final Result target) { return new UpdateInfo<>( Topic.ENRICH_MISSING_PUBLICATION_DATE, diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcher.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcher.java index b8b6132cd..d91b03200 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcher.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcher.java @@ -30,8 +30,7 @@ public abstract class UpdateMatcher { if (source != res) { for (final UpdateInfo info : findUpdates(source, res)) { final String s = DigestUtils.md5Hex(info.getHighlightValueAsString()); - if (!infoMap.containsKey(s) || infoMap.get(s).getTrust() < info.getTrust()) { - } else { + if (!infoMap.containsKey(s) || infoMap.get(s).getTrust() < info.getTrust()) {} else { infoMap.put(s, info); } } @@ -54,11 +53,16 @@ public abstract class UpdateMatcher { protected abstract List> findUpdates(Result source, Result target); - protected abstract UpdateInfo generateUpdateInfo(final T highlightValue, final Result source, + protected abstract UpdateInfo generateUpdateInfo(final T highlightValue, + final Result source, final Result target); protected static boolean isMissing(final List> list) { return list == null || list.isEmpty() || StringUtils.isBlank(list.get(0).getValue()); } + protected boolean isMissing(final Field field) { + return field == null || StringUtils.isBlank(field.getValue()); + } + } From b3bcbb3129fb26b3222e85cbfc7c421ab6fbeaa3 Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Thu, 21 May 2020 08:41:32 +0200 Subject: [PATCH 2/5] resolve name of organization countries --- .../eu/dnetlib/dhp/oa/graph/sql/queryOrganizations.sql | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryOrganizations.sql b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryOrganizations.sql index aeb04aff9..4dbd1c429 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryOrganizations.sql +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryOrganizations.sql @@ -22,12 +22,13 @@ SELECT '' AS inferenceprovenance, d.id AS collectedfromid, d.officialname AS collectedfromname, - o.country || '@@@' || o.country || '@@@dnet:countries@@@dnet:countries' AS country, + o.country || '@@@' || cntr.name || '@@@dnet:countries@@@dnet:countries' AS country, 'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions' AS provenanceaction, - ARRAY[]::text[] AS pid + FROM dsm_organizations o LEFT OUTER JOIN dsm_datasources d ON (d.id = o.collectedfrom) + LEFT OUTER JOIN class cntr ON (cntr.code = o.country) From e43d4d7778851846bb0f4f361996682d2c3056f4 Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Thu, 21 May 2020 11:08:07 +0200 Subject: [PATCH 3/5] added a coalesce in sql query --- .../eu/dnetlib/dhp/oa/graph/sql/queryOrganizations.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryOrganizations.sql b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryOrganizations.sql index 4dbd1c429..d13bd4342 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryOrganizations.sql +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryOrganizations.sql @@ -22,7 +22,7 @@ SELECT '' AS inferenceprovenance, d.id AS collectedfromid, d.officialname AS collectedfromname, - o.country || '@@@' || cntr.name || '@@@dnet:countries@@@dnet:countries' AS country, + o.country || '@@@' || COALESCE(cntr.name,o.country) || '@@@dnet:countries@@@dnet:countries' AS country, 'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions' AS provenanceaction, ARRAY[]::text[] AS pid From 8bbd1d0501ada82c6a037e0379c71006723544ab Mon Sep 17 00:00:00 2001 From: miconis Date: Thu, 21 May 2020 11:52:14 +0200 Subject: [PATCH 4/5] reimplementation of the author merging in deduprecord creation. implementation of the test class. --- .../eu/dnetlib/dhp/oa/dedup/AuthorMerger.java | 159 ++++++++++++++++++ .../dhp/oa/dedup/DedupRecordFactory.java | 23 +-- .../eu/dnetlib/dhp/oa/dedup/DedupUtility.java | 112 ------------ .../dhp/oa/dedup/EntityMergerTest.java | 138 +++++++++++++++ .../dnetlib/dhp/oa/dedup/MergeAuthorTest.java | 54 ------ .../dhp/dedup/json/publication_merge.json | 3 + 6 files changed, 313 insertions(+), 176 deletions(-) create mode 100644 dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/AuthorMerger.java create mode 100644 dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java delete mode 100644 dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/MergeAuthorTest.java create mode 100644 dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/json/publication_merge.json diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/AuthorMerger.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/AuthorMerger.java new file mode 100644 index 000000000..108f4a4be --- /dev/null +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/AuthorMerger.java @@ -0,0 +1,159 @@ +package eu.dnetlib.dhp.oa.dedup; + +import com.wcohen.ss.JaroWinkler; +import eu.dnetlib.dhp.schema.oaf.Author; +import eu.dnetlib.dhp.schema.oaf.StructuredProperty; +import eu.dnetlib.pace.model.Person; +import org.apache.commons.lang3.StringUtils; +import scala.Tuple2; + +import java.text.Normalizer; +import java.util.*; +import java.util.stream.Collectors; + +public class AuthorMerger { + + private static final Double THRESHOLD = 0.95; + + public static List merge(List> authors){ + + authors.sort(new Comparator>() { + @Override + public int compare(List o1, List o2) { + return -Integer.compare(countAuthorsPids(o1), countAuthorsPids(o2)); + } + }); + + List author = new ArrayList<>(); + + for(List a : authors){ + author = mergeAuthor(author, a); + } + + return author; + + } + + public static List mergeAuthor(final List a, final List b) { + int pa = countAuthorsPids(a); + int pb = countAuthorsPids(b); + List base, enrich; + int sa = authorsSize(a); + int sb = authorsSize(b); + + if (pa == pb) { + base = sa > sb ? a : b; + enrich = sa > sb ? b : a; + } else { + base = pa > pb ? a : b; + enrich = pa > pb ? b : a; + } + enrichPidFromList(base, enrich); + return base; + } + + private static void enrichPidFromList(List base, List enrich) { + if (base == null || enrich == null) + return; + final Map basePidAuthorMap = base + .stream() + .filter(a -> a.getPid() != null && a.getPid().size() > 0) + .flatMap( + a -> a + .getPid() + .stream() + .map(p -> new Tuple2<>(pidToComparableString(p), a))) + .collect(Collectors.toMap(Tuple2::_1, Tuple2::_2, (x1, x2) -> x1)); + + final List> pidToEnrich = enrich + .stream() + .filter(a -> a.getPid() != null && a.getPid().size() > 0) + .flatMap( + a -> a + .getPid() + .stream() + .filter(p -> !basePidAuthorMap.containsKey(pidToComparableString(p))) + .map(p -> new Tuple2<>(p, a))) + .collect(Collectors.toList()); + + pidToEnrich + .forEach( + a -> { + Optional> simAuthor = base + .stream() + .map(ba -> new Tuple2<>(sim(ba, a._2()), ba)) + .max(Comparator.comparing(Tuple2::_1)); + if (simAuthor.isPresent() && simAuthor.get()._1() > THRESHOLD) { + Author r = simAuthor.get()._2(); + if (r.getPid() == null) { + r.setPid(new ArrayList<>()); + } + r.getPid().add(a._1()); + } + }); + } + + public static String pidToComparableString(StructuredProperty pid){ + return (pid.getQualifier()!=null? pid.getQualifier().getClassid()!=null?pid.getQualifier().getClassid().toLowerCase():"":"") + (pid.getValue()!=null? pid.getValue().toLowerCase():""); + } + + public static int countAuthorsPids(List authors) { + if (authors == null) + return 0; + + return (int) authors.stream().filter(AuthorMerger::hasPid).count(); + } + + private static int authorsSize(List authors) { + if (authors == null) + return 0; + return authors.size(); + } + + private static Double sim(Author a, Author b) { + + final Person pa = parse(a); + final Person pb = parse(b); + + if (pa.isAccurate() & pb.isAccurate()) { + return new JaroWinkler() + .score(normalize(pa.getSurnameString()), normalize(pb.getSurnameString())); + } else { + return new JaroWinkler() + .score(normalize(pa.getNormalisedFullname()), normalize(pb.getNormalisedFullname())); + } + } + + private static boolean hasPid(Author a) { + if (a == null || a.getPid() == null || a.getPid().size() == 0) + return false; + return a.getPid().stream().anyMatch(p -> p != null && StringUtils.isNotBlank(p.getValue())); + } + + private static Person parse(Author author) { + if (StringUtils.isNotBlank(author.getSurname())) { + return new Person(author.getSurname() + ", " + author.getName(), false); + } else { + return new Person(author.getFullname(), false); + } + } + + private static String normalize(final String s) { + return nfd(s) + .toLowerCase() + // do not compact the regexes in a single expression, would cause StackOverflowError + // in case + // of large input strings + .replaceAll("(\\W)+", " ") + .replaceAll("(\\p{InCombiningDiacriticalMarks})+", " ") + .replaceAll("(\\p{Punct})+", " ") + .replaceAll("(\\d)+", " ") + .replaceAll("(\\n)+", " ") + .trim(); + } + + private static String nfd(final String s) { + return Normalizer.normalize(s, Normalizer.Form.NFD); + } + +} diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java index fa06424d7..eed783e53 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java @@ -1,8 +1,9 @@ - package eu.dnetlib.dhp.oa.dedup; +import java.io.Serializable; import java.util.Collection; import java.util.Iterator; +import java.util.List; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.MapGroupsFunction; @@ -67,16 +68,18 @@ public class DedupRecordFactory { (MapFunction, String>) entity -> entity._1(), Encoders.STRING()) .mapGroups( (MapGroupsFunction, T>) (key, - values) -> entityMerger(key, values, ts, dataInfo), + values) -> entityMerger(key, values, ts, dataInfo, clazz), Encoders.bean(clazz)); } - private static T entityMerger( - String id, Iterator> entities, long ts, DataInfo dataInfo) { + public static T entityMerger( + String id, Iterator> entities, long ts, DataInfo dataInfo, Class clazz) throws IllegalAccessException, InstantiationException { - T entity = entities.next()._2(); + T entity = clazz.newInstance(); final Collection dates = Lists.newArrayList(); + final List> authors = Lists.newArrayList(); + entities .forEachRemaining( t -> { @@ -84,17 +87,17 @@ public class DedupRecordFactory { entity.mergeFrom(duplicate); if (ModelSupport.isSubClass(duplicate, Result.class)) { Result r1 = (Result) duplicate; - Result er = (Result) entity; - er.setAuthor(DedupUtility.mergeAuthor(er.getAuthor(), r1.getAuthor())); - - if (r1.getDateofacceptance() != null) { + if (r1.getAuthor() != null && r1.getAuthor().size()>0) + authors.add(r1.getAuthor()); + if (r1.getDateofacceptance() != null) dates.add(r1.getDateofacceptance().getValue()); - } } }); + //set authors and date if (ModelSupport.isSubClass(entity, Result.class)) { ((Result) entity).setDateofacceptance(DatePicker.pick(dates)); + ((Result) entity).setAuthor(AuthorMerger.merge(authors)); } entity.setId(id); diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupUtility.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupUtility.java index d3ae8ee4f..222794d64 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupUtility.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupUtility.java @@ -32,7 +32,6 @@ import eu.dnetlib.pace.model.Person; import scala.Tuple2; public class DedupUtility { - private static final Double THRESHOLD = 0.95; public static Map constructAccumulator( final DedupConfig dedupConf, final SparkContext context) { @@ -82,58 +81,6 @@ public class DedupUtility { } } - public static List mergeAuthor(final List a, final List b) { - int pa = countAuthorsPids(a); - int pb = countAuthorsPids(b); - List base, enrich; - int sa = authorsSize(a); - int sb = authorsSize(b); - - if (pa == pb) { - base = sa > sb ? a : b; - enrich = sa > sb ? b : a; - } else { - base = pa > pb ? a : b; - enrich = pa > pb ? b : a; - } - enrichPidFromList(base, enrich); - return base; - } - - private static void enrichPidFromList(List base, List enrich) { - if (base == null || enrich == null) - return; - final Map basePidAuthorMap = base - .stream() - .filter(a -> a.getPid() != null && a.getPid().size() > 0) - .flatMap(a -> a.getPid().stream().map(p -> new Tuple2<>(p.toComparableString(), a))) - .collect(Collectors.toMap(Tuple2::_1, Tuple2::_2, (x1, x2) -> x1)); - - final List> pidToEnrich = enrich - .stream() - .filter(a -> a.getPid() != null && a.getPid().size() > 0) - .flatMap( - a -> a - .getPid() - .stream() - .filter(p -> !basePidAuthorMap.containsKey(p.toComparableString())) - .map(p -> new Tuple2<>(p, a))) - .collect(Collectors.toList()); - - pidToEnrich - .forEach( - a -> { - Optional> simAuhtor = base - .stream() - .map(ba -> new Tuple2<>(sim(ba, a._2()), ba)) - .max(Comparator.comparing(Tuple2::_1)); - if (simAuhtor.isPresent() && simAuhtor.get()._1() > THRESHOLD) { - Author r = simAuhtor.get()._2(); - r.getPid().add(a._1()); - } - }); - } - public static String createDedupRecordPath( final String basePath, final String actionSetId, final String entityType) { return String.format("%s/%s/%s_deduprecord", basePath, actionSetId, entityType); @@ -153,65 +100,6 @@ public class DedupUtility { return String.format("%s/%s/%s_mergerel", basePath, actionSetId, entityType); } - private static Double sim(Author a, Author b) { - - final Person pa = parse(a); - final Person pb = parse(b); - - if (pa.isAccurate() & pb.isAccurate()) { - return new JaroWinkler() - .score(normalize(pa.getSurnameString()), normalize(pb.getSurnameString())); - } else { - return new JaroWinkler() - .score(normalize(pa.getNormalisedFullname()), normalize(pb.getNormalisedFullname())); - } - } - - private static String normalize(final String s) { - return nfd(s) - .toLowerCase() - // do not compact the regexes in a single expression, would cause StackOverflowError - // in case - // of large input strings - .replaceAll("(\\W)+", " ") - .replaceAll("(\\p{InCombiningDiacriticalMarks})+", " ") - .replaceAll("(\\p{Punct})+", " ") - .replaceAll("(\\d)+", " ") - .replaceAll("(\\n)+", " ") - .trim(); - } - - private static String nfd(final String s) { - return Normalizer.normalize(s, Normalizer.Form.NFD); - } - - private static Person parse(Author author) { - if (StringUtils.isNotBlank(author.getSurname())) { - return new Person(author.getSurname() + ", " + author.getName(), false); - } else { - return new Person(author.getFullname(), false); - } - } - - private static int countAuthorsPids(List authors) { - if (authors == null) - return 0; - - return (int) authors.stream().filter(DedupUtility::hasPid).count(); - } - - private static int authorsSize(List authors) { - if (authors == null) - return 0; - return authors.size(); - } - - private static boolean hasPid(Author a) { - if (a == null || a.getPid() == null || a.getPid().size() == 0) - return false; - return a.getPid().stream().anyMatch(p -> p != null && StringUtils.isNotBlank(p.getValue())); - } - public static List getConfigurations(String isLookUpUrl, String orchestrator) throws ISLookUpException, DocumentException { final ISLookUpService isLookUpService = ISLookupClientFactory.getLookUpService(isLookUpUrl); diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java new file mode 100644 index 000000000..526dd73bb --- /dev/null +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java @@ -0,0 +1,138 @@ +package eu.dnetlib.dhp.oa.dedup; + +import java.io.BufferedReader; +import java.io.FileReader; +import java.io.IOException; +import java.io.Serializable; +import java.nio.file.Paths; +import java.util.*; + +import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.pace.util.MapDocumentUtil; +import org.codehaus.jackson.map.ObjectMapper; +import org.junit.jupiter.api.BeforeEach; + +import org.junit.jupiter.api.Test; +import scala.Tuple2; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +public class EntityMergerTest implements Serializable { + + List> publications; + + String testEntityBasePath; + DataInfo dataInfo; + String dedupId = "dedup_id"; + Publication pub_top; + + @BeforeEach + public void setUp() throws Exception { + + testEntityBasePath = Paths + .get(SparkDedupTest.class.getResource("/eu/dnetlib/dhp/dedup/json").toURI()) + .toFile() + .getAbsolutePath(); + + publications = readSample(testEntityBasePath + "/publication_merge.json", Publication.class); + + pub_top = getTopPub(publications); + + dataInfo = setDI(); + + } + + @Test + public void publicationMergerTest() throws InstantiationException, IllegalAccessException { + + Publication pub_merged = DedupRecordFactory.entityMerger(dedupId, publications.iterator(), 0, dataInfo, Publication.class); + + assertEquals(dedupId, pub_merged.getId()); + + assertEquals(pub_merged.getJournal(), pub_top.getJournal()); + assertEquals(pub_merged.getBestaccessright(), pub_top.getBestaccessright()); + assertEquals(pub_merged.getResulttype(), pub_top.getResulttype()); + assertEquals(pub_merged.getLanguage(), pub_merged.getLanguage()); + assertEquals(pub_merged.getPublisher(), pub_top.getPublisher()); + assertEquals(pub_merged.getEmbargoenddate(), pub_top.getEmbargoenddate()); + assertEquals(pub_merged.getResourcetype().getClassid(), "0004"); + assertEquals(pub_merged.getDateoftransformation(), pub_top.getDateoftransformation()); + assertEquals(pub_merged.getOaiprovenance(), pub_top.getOaiprovenance()); + assertEquals(pub_merged.getDateofcollection(), pub_top.getDateofcollection()); + assertEquals(pub_merged.getInstance().size(),3); + assertEquals(pub_merged.getCountry().size(), 2); + assertEquals(pub_merged.getSubject().size(), 0); + assertEquals(pub_merged.getTitle().size(), 2); + assertEquals(pub_merged.getRelevantdate().size(),0); + assertEquals(pub_merged.getDescription().size(),0); + assertEquals(pub_merged.getSource().size(),0); + assertEquals(pub_merged.getFulltext().size(),0); + assertEquals(pub_merged.getFormat().size(),0); + assertEquals(pub_merged.getContributor().size(),0); + assertEquals(pub_merged.getCoverage().size(),0); + assertEquals(pub_merged.getContext().size(),0); + assertEquals(pub_merged.getExternalReference().size(),0); + assertEquals(pub_merged.getOriginalId().size(),3); + assertEquals(pub_merged.getCollectedfrom().size(),3); + assertEquals(pub_merged.getPid().size(),1); + assertEquals(pub_merged.getExtraInfo().size(),0); + + //verify datainfo + assertEquals(pub_merged.getDataInfo(), dataInfo); + + //verify datepicker + assertEquals(pub_merged.getDateofacceptance().getValue(), "2018-09-30"); + + //verify authors + assertEquals(pub_merged.getAuthor().size(), 9); + assertEquals(AuthorMerger.countAuthorsPids(pub_merged.getAuthor()), 4); + } + + public DataInfo setDI(){ + DataInfo dataInfo = new DataInfo(); + dataInfo.setTrust("0.9"); + dataInfo.setDeletedbyinference(false); + dataInfo.setInferenceprovenance("testing"); + dataInfo.setInferred(true); + return dataInfo; + } + + public Publication getTopPub(List> publications){ + + Double maxTrust = 0.0; + Publication maxPub = new Publication(); + for (Tuple2 publication : publications) { + Double pubTrust = Double.parseDouble(publication._2().getDataInfo().getTrust()); + if(pubTrust > maxTrust){ + maxTrust = pubTrust; + maxPub = publication._2(); + } + } + return maxPub; + } + + public List> readSample(String path, Class clazz) { + List> res = new ArrayList<>(); + BufferedReader reader; + try { + reader = new BufferedReader(new FileReader(path)); + String line = reader.readLine(); + while (line != null) { + res.add( + new Tuple2<>( + MapDocumentUtil.getJPathString("$.id", line), + new ObjectMapper().readValue(line, clazz)) + ); + // read next line + line = reader.readLine(); + } + reader.close(); + } catch (IOException e) { + e.printStackTrace(); + } + + return res; + } + + +} diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/MergeAuthorTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/MergeAuthorTest.java deleted file mode 100644 index a217a2657..000000000 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/MergeAuthorTest.java +++ /dev/null @@ -1,54 +0,0 @@ - -package eu.dnetlib.dhp.oa.dedup; - -import java.io.IOException; -import java.util.Arrays; -import java.util.List; -import java.util.stream.Collectors; - -import org.apache.commons.io.IOUtils; -import org.codehaus.jackson.map.ObjectMapper; -import org.junit.jupiter.api.BeforeEach; - -import eu.dnetlib.dhp.schema.oaf.Publication; - -public class MergeAuthorTest { - - private List publicationsToMerge; - private final ObjectMapper mapper = new ObjectMapper(); - - @BeforeEach - public void setUp() throws Exception { - final String json = IOUtils - .toString( - this.getClass().getResourceAsStream("/eu/dnetlib/dhp/dedup/json/authors_merge.json")); - - publicationsToMerge = Arrays - .asList(json.split("\n")) - .stream() - .map( - s -> { - try { - return mapper.readValue(s, Publication.class); - } catch (IOException e) { - throw new RuntimeException(e); - } - }) - .collect(Collectors.toList()); - } - - // FIX ME Michele DB this tests doesn't work - // @Test - public void test() throws Exception { - Publication dedup = new Publication(); - - publicationsToMerge - .forEach( - p -> { - dedup.mergeFrom(p); - dedup.setAuthor(DedupUtility.mergeAuthor(dedup.getAuthor(), p.getAuthor())); - }); - - System.out.println(mapper.writeValueAsString(dedup)); - } -} diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/json/publication_merge.json b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/json/publication_merge.json new file mode 100644 index 000000000..015f9294a --- /dev/null +++ b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/json/publication_merge.json @@ -0,0 +1,3 @@ +{"context": [], "dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:datasetarchive", "classname": "sysimport:crosswalk:datasetarchive", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "dedup-similarity-result-levenstein", "invisible": false, "trust": "0.95"}, "resourcetype": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "pid": [], "contributor": [], "resulttype": {"classid": "publication", "classname": "publication", "schemename": "dnet:result_typologies", "schemeid": "dnet:result_typologies"}, "relevantdate": [], "collectedfrom": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "Journal.fi", "key": "10|openaire____::6eef8049d0feedc089ee009abca55e35"}], "id": "50|a89337edbe55::4930db9e954866d70916cbfba9f81f97", "subject": [], "instance": [{"refereed": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "hostedby": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "Journal.fi", "key": "10|openaire____::6eef8049d0feedc089ee009abca55e35"}, "processingchargeamount": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "license": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "url": [], "distributionlocation": "", "processingchargecurrency": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "dateofacceptance": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "2016-01-01"}, "collectedfrom": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "Journal.fi", "key": "10|openaire____::6eef8049d0feedc089ee009abca55e35"}, "accessright": {"classid": "OPEN", "classname": "Open Access", "schemename": "dnet:access_modes", "schemeid": "dnet:access_modes"}, "instancetype": {"classid": "0001", "classname": "Article", "schemename": "dnet:dataCite_resource", "schemeid": "dnet:dataCite_resource"}}], "embargoenddate": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "lastupdatetimestamp": 0, "author": [{"surname": "Go\\u0308tz", "name": "Julia", "pid": [], "rank": 5, "affiliation": [], "fullname": "G\\u00f6tz, Julia"}, {"surname": "Wolff", "name": "Stephan", "pid": [], "rank": 6, "affiliation": [], "fullname": "Wolff, Stephan"}, {"surname": "Jansen", "name": "Olav", "pid": [], "rank": 7, "affiliation": [], "fullname": "Jansen, Olav"}, {"surname": "Dressler", "name": "Dirk", "pid": [{"qualifier": {"classid": "ORCID", "classname": "ORCID"}, "value": "0000-0000-0656-9999"},{"qualifier": {"classid": "id", "classname": "id"}, "value": "987654321"}], "rank": 8, "affiliation": [], "fullname": "Dressler, Dirk"}, {"surname": "Schneider", "name": "Susanne A.", "pid": [], "rank": 9, "affiliation": [], "fullname": "Schneider, Susanne A."}], "source": [], "dateofcollection": "2019-11-05T14:49:22.351Z", "fulltext": [], "dateoftransformation": "2019-11-05T16:10:58.988Z", "description": [], "format": [], "journal": {"issnPrinted": "1459-6067", "conferencedate": "", "conferenceplace": "", "name": "Agricultural and Food Science", "edition": "", "iss": "3", "sp": "", "vol": "27", "dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "issnOnline": "1795-1895", "ep": "", "issnLinking": ""}, "coverage": [], "publisher": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "language": {"classid": "eng", "classname": "English", "schemename": "dnet:languages", "schemeid": "dnet:languages"}, "bestaccessright": {"classid": "OPEN", "classname": "Open Access", "schemename": "dnet:access_modes", "schemeid": "dnet:access_modes"}, "country": [], "extraInfo": [], "originalId": [], "dateofacceptance": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "2018-09-30"}, "title": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "qualifier": {"classid": "main title", "classname": "main title", "schemename": "dnet:dataCite_title", "schemeid": "dnet:dataCite_title"}, "value": "Altered brain activation in a reversal learning task unmasks adaptive changes in cognitive control in writer's cramp"}]} +{"context": [], "dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "sysimport:crosswalk:repository", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": true, "inferenceprovenance": "dedup-similarity-result-levenstein", "invisible": false, "trust": "0.9"}, "resourcetype": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "pid": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "qualifier": {"classid": "doi", "classname": "doi", "schemename": "dnet:pid_types", "schemeid": "dnet:pid_types"}, "value": "10.1016/j.nicl.2015.11.006"}], "contributor": [], "resulttype": {"classid": "publication", "classname": "publication", "schemename": "dnet:result_typologies", "schemeid": "dnet:result_typologies"}, "relevantdate": [], "collectedfrom": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "BASE (Open Access Aggregator)", "key": "10|openaire____::df45502607927471ecf8a6ae83683ff5"}], "id": "50|base_oa_____::0968af610a356656706657e4f234b340", "subject": [], "instance": [{"refereed": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "hostedby": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "NeuroImage: Clinical", "key": "10|doajarticles::0c0e74daa5d95504eade9c81ebbd5b8a"}, "processingchargeamount": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "license": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "http://creativecommons.org/licenses/by-nc-nd/4.0/"}, "url": ["http://dx.doi.org/10.1016/j.nicl.2015.11.006"], "distributionlocation": "", "processingchargecurrency": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "dateofacceptance": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "2016-01-01"}, "collectedfrom": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "BASE (Open Access Aggregator)", "key": "10|openaire____::df45502607927471ecf8a6ae83683ff5"}, "accessright": {"classid": "OPEN", "classname": "Open Access", "schemename": "dnet:access_modes", "schemeid": "dnet:access_modes"}, "instancetype": {"classid": "0001", "classname": "Article", "schemename": "dnet:publication_resource", "schemeid": "dnet:publication_resource"}}], "embargoenddate": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "lastupdatetimestamp": 0, "author": [{"surname": "Zeuner", "name": "Kirsten E.", "pid": [], "rank": 1, "affiliation": [], "fullname": "Zeuner, Kirsten E."}, {"surname": "Knutzen", "name": "Arne", "pid": [], "rank": 2, "affiliation": [], "fullname": "Knutzen, Arne"}, {"surname": "Granert", "name": "Oliver", "pid": [{"qualifier": {"classid": "ORCID", "classname": "ORCID"}, "value": "0000-0002-0656-1023"}], "rank": 3, "affiliation": [], "fullname": "Granert, Oliver"}, {"surname": "Sablowsky", "name": "Simone", "pid": [], "rank": 4, "affiliation": [], "fullname": "Sablowsky, Simone"}, {"surname": "Go\\u0308tz", "name": "Julia", "pid": [], "rank": 5, "affiliation": [], "fullname": "G\\u00f6tz, Julia"}, {"surname": "Wolff", "name": "Stephan", "pid": [], "rank": 6, "affiliation": [], "fullname": "Wolff, Stephan"}, {"surname": "Jansen", "name": "Olav", "pid": [], "rank": 7, "affiliation": [], "fullname": "Jansen, Olav"}, {"surname": "Dressler", "name": "Dirk", "pid": [], "rank": 8, "affiliation": [], "fullname": "Dressler, Dirk"}, {"surname": "Schneider", "name": "Susanne A.", "pid": [], "rank": 9, "affiliation": [], "fullname": "Schneider, Susanne A."}, {"surname": "Klein", "name": "Christine", "pid": [], "rank": 10, "affiliation": [], "fullname": "Klein, Christine"}, {"surname": "Deuschl", "name": "Gu\\u0308nther", "pid": [], "rank": 11, "affiliation": [], "fullname": "Deuschl, G\\u00fcnther"}, {"surname": "Eimeren", "name": "Thilo", "pid": [], "rank": 12, "affiliation": [], "fullname": "van Eimeren, Thilo"}, {"surname": "Witt", "name": "Karsten", "pid": [], "rank": 13, "affiliation": [], "fullname": "Witt, Karsten"}], "source": [], "dateofcollection": "2017-07-27T19:04:09.131Z", "fulltext": [], "dateoftransformation": "2019-01-23T10:15:19.582Z", "description": [], "format": [], "journal": {"issnPrinted": "2213-1582", "conferencedate": "", "conferenceplace": "", "name": "NeuroImage: Clinical", "edition": "", "iss": "", "sp": "63", "vol": "10", "dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "issnOnline": "", "ep": "70", "issnLinking": ""}, "coverage": [], "publisher": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "Elsevier BV"}, "language": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "bestaccessright": {"classid": "OPEN", "classname": "Open Access", "schemename": "dnet:access_modes", "schemeid": "dnet:access_modes"}, "country": [{"classid": "IT", "classname": "Italy", "schemeid": "dnet:countries", "schemename": "dnet:countries"}], "extraInfo": [], "originalId": ["10.1016/j.nicl.2015.11.006"], "dateofacceptance": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "2016-01-01"}, "title": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "qualifier": {"classid": "main title", "classname": "main title", "schemename": "dnet:dataCite_title", "schemeid": "dnet:dataCite_title"}, "value": "Altered brain activation in a reversal learning task unmasks adaptive changes in cognitive control in writer's cramp"}]} +{"context": [], "dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:datasetarchive", "classname": "sysimport:crosswalk:datasetarchive", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": true, "inferenceprovenance": "dedup-similarity-result-levenstein", "invisible": false, "trust": "0.9"}, "resourcetype": {"classid": "0004", "classname": "Conference object", "schemename": "dnet:dataCite_resource", "schemeid": "dnet:dataCite_resource"}, "pid": [], "contributor": [], "resulttype": {"classid": "publication", "classname": "publication", "schemename": "dnet:result_typologies", "schemeid": "dnet:result_typologies"}, "relevantdate": [], "collectedfrom": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "CRIS UNS (Current Research Information System University of Novi Sad)", "key": "10|CRIS_UNS____::f66f1bd369679b5b077dcdf006089556"}], "id": "50|CrisUnsNoviS::9f9d014eea45dab432cab636c4c9cf39", "subject": [], "instance": [{"refereed": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "hostedby": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "CRIS UNS (Current Research Information System University of Novi Sad)", "key": "10|CRIS_UNS____::f66f1bd369679b5b077dcdf006089556"}, "processingchargeamount": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "license": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "url": ["https://www.cris.uns.ac.rs/record.jsf?recordId=113444&source=OpenAIRE&language=en"], "distributionlocation": "", "processingchargecurrency": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "dateofacceptance": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "2019-01-01"}, "collectedfrom": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "CRIS UNS (Current Research Information System University of Novi Sad)", "key": "10|CRIS_UNS____::f66f1bd369679b5b077dcdf006089556"}, "accessright": {"classid": "UNKNOWN", "classname": "UNKNOWN", "schemename": "dnet:access_modes", "schemeid": "dnet:access_modes"}, "instancetype": {"classid": "0004", "classname": "Conference object", "schemename": "dnet:dataCite_resource", "schemeid": "dnet:dataCite_resource"}}], "embargoenddate": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "lastupdatetimestamp": 0, "author": [{"surname": "Zeuner", "name": "Kirsten E.", "pid": [], "rank": 1, "affiliation": [], "fullname": "Zeuner, Kirsten E."}, {"surname": "Knutzen", "name": "Arne", "pid": [], "rank": 2, "affiliation": [], "fullname": "Knutzen, Arne"}, {"surname": "Granert", "name": "Oliver", "pid": [{"qualifier": {"classid": "ORCID", "classname": "ORCID"}, "value": "0000-0002-0656-1023"}, {"qualifier": {"classid": "pubmed", "classname": "pubmed"}, "value": "pubmed.it"}], "rank": 3, "affiliation": [], "fullname": "Granert, Oliver"}, {"surname": "Sablowsky", "name": "Simone", "pid": [{"qualifier": {"classid": "id", "classname": "id"}, "value": "12345678"}], "rank": 4, "affiliation": [], "fullname": "Sablowsky, Simone"}, {"surname": "Go\\u0308tz", "name": "Julia", "pid": [], "rank": 5, "affiliation": [], "fullname": "G\\u00f6tz, Julia"}, {"surname": "Wolff", "name": "Stephan", "pid": [], "rank": 6, "affiliation": [], "fullname": "Wolff, Stephan"}, {"surname": "Jansen", "name": "Olav", "pid": [{"qualifier": {"classid": "ORCID", "classname": "ORCID"}, "value": "0000-0000-0656-1023"},{"qualifier": {"classid": "id", "classname": "id"}, "value": "987654321"}], "rank": 7, "affiliation": [], "fullname": "Jansen, Olav"}, {"surname": "Dressler", "name": "Dirk", "pid": [], "rank": 8, "affiliation": [], "fullname": "Dressler, Dirk"}, {"surname": "Schneider", "name": "Susanne A.", "pid": [], "rank": 9, "affiliation": [], "fullname": "Schneider, Susanne A."}], "source": [], "dateofcollection": "2020-03-10T15:05:38.685Z", "fulltext": [], "dateoftransformation": "2020-03-11T20:11:13.15Z", "description": [], "format": [], "journal": {"issnPrinted": "", "conferencedate": "", "conferenceplace": "", "name": "", "edition": "", "iss": "", "sp": "", "vol": "", "dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "issnOnline": "", "ep": "", "issnLinking": ""}, "coverage": [], "publisher": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "language": {"classid": "en", "classname": "en", "schemename": "dnet:languages", "schemeid": "dnet:languages"}, "bestaccessright": {"classid": "UNKNOWN", "classname": "not available", "schemename": "dnet:access_modes", "schemeid": "dnet:access_modes"}, "country": [{"classid": "FI", "classname": "Finland", "schemeid": "dnet:countries", "schemename": "dnet:countries"}], "extraInfo": [], "originalId": ["(BISIS)113444", "https://www.cris.uns.ac.rs/record.jsf?recordId=113444&source=OpenAIRE&language=en"], "dateofacceptance": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "2016-01-01"}, "title": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "qualifier": {"classid": "main title", "classname": "main title", "schemename": "dnet:dataCite_title", "schemeid": "dnet:dataCite_title"}, "value": "Antichains of copies of ultrahomogeneous structures"}]} \ No newline at end of file From 3e345174790135695590409284747c42d6c9cbf3 Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Thu, 21 May 2020 16:47:53 +0200 Subject: [PATCH 5/5] partial implementation of events with rels --- .../eu/dnetlib/dhp/broker/model/Topic.java | 69 ++++---- .../broker/oa/GenerateEventsApplication.java | 161 +++++++++++++++--- .../oa/matchers/EnrichMissingAbstract.java | 5 +- .../oa/matchers/EnrichMissingAuthorOrcid.java | 5 +- .../EnrichMissingDatasetIsReferencedBy.java | 38 +++++ .../EnrichMissingDatasetIsRelatedTo.java | 38 +++++ .../EnrichMissingDatasetIsSupplementedBy.java | 38 +++++ .../EnrichMissingDatasetIsSupplementedTo.java | 38 +++++ .../EnrichMissingDatasetReferences.java | 38 +++++ .../oa/matchers/EnrichMissingOpenAccess.java | 2 +- .../broker/oa/matchers/EnrichMissingPid.java | 2 +- .../oa/matchers/EnrichMissingProject.java | 21 ++- .../EnrichMissingPublicationDate.java | 2 +- ...nrichMissingPublicationIsReferencedBy.java | 42 +++++ .../EnrichMissingPublicationIsRelatedTo.java | 42 +++++ ...ichMissingPublicationIsSupplementedBy.java | 42 +++++ ...ichMissingPublicationIsSupplementedTo.java | 42 +++++ .../EnrichMissingPublicationReferences.java | 42 +++++ .../oa/matchers/EnrichMissingSoftware.java | 41 +++++ .../oa/matchers/EnrichMissingSubject.java | 2 +- .../oa/matchers/EnrichMoreOpenAccess.java | 2 +- .../dhp/broker/oa/matchers/EnrichMorePid.java | 2 +- .../broker/oa/matchers/EnrichMoreProject.java | 39 +++++ .../oa/matchers/EnrichMoreSoftware.java | 41 +++++ .../broker/oa/matchers/EnrichMoreSubject.java | 2 +- .../dhp/broker/oa/matchers/UpdateMatcher.java | 16 +- .../dhp/broker/oa/util/BrokerConstants.java | 2 + 27 files changed, 729 insertions(+), 85 deletions(-) create mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingDatasetIsReferencedBy.java create mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingDatasetIsRelatedTo.java create mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingDatasetIsSupplementedBy.java create mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingDatasetIsSupplementedTo.java create mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingDatasetReferences.java create mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationIsReferencedBy.java create mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationIsRelatedTo.java create mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationIsSupplementedBy.java create mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationIsSupplementedTo.java create mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationReferences.java create mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingSoftware.java create mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMoreProject.java create mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMoreSoftware.java diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/Topic.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/Topic.java index 98088dd0a..0716bd98d 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/Topic.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/Topic.java @@ -4,48 +4,45 @@ package eu.dnetlib.dhp.broker.model; public enum Topic { // ENRICHMENT MISSING - ENRICH_MISSING_OA_VERSION("ENRICH/MISSING/OPENACCESS_VERSION"), - ENRICH_MISSING_ABSTRACT("ENRICH/MISSING/ABSTRACT"), - ENRICH_MISSING_PUBLICATION_DATE("ENRICH/MISSING/PUBLICATION_DATE"), - ENRICH_MISSING_PID("ENRICH/MISSING/PID"), - ENRICH_MISSING_PROJECT("ENRICH/MISSING/PROJECT"), - ENRICH_MISSING_SOFTWARE("ENRICH/MISSING/SOFTWARE"), - ENRICH_MISSING_SUBJECT_MESHEUROPMC("ENRICH/MISSING/SUBJECT/MESHEUROPMC"), - ENRICH_MISSING_SUBJECT_ARXIV("ENRICH/MISSING/SUBJECT/ARXIV"), - ENRICH_MISSING_SUBJECT_JEL("ENRICH/MISSING/SUBJECT/JEL"), - ENRICH_MISSING_SUBJECT_DDC("ENRICH/MISSING/SUBJECT/DDC"), - ENRICH_MISSING_SUBJECT_ACM("ENRICH/MISSING/SUBJECT/ACM"), - ENRICH_MISSING_SUBJECT_RVK("ENRICH/MISSING/SUBJECT/RVK"), - ENRICH_MISSING_AUTHOR_ORCID("ENRICH/MISSING/AUTHOR/ORCID"), + ENRICH_MISSING_OA_VERSION("ENRICH/MISSING/OPENACCESS_VERSION"), ENRICH_MISSING_ABSTRACT( + "ENRICH/MISSING/ABSTRACT"), ENRICH_MISSING_PUBLICATION_DATE( + "ENRICH/MISSING/PUBLICATION_DATE"), ENRICH_MISSING_PID( + "ENRICH/MISSING/PID"), ENRICH_MISSING_PROJECT("ENRICH/MISSING/PROJECT"), ENRICH_MISSING_SOFTWARE( + "ENRICH/MISSING/SOFTWARE"), ENRICH_MISSING_SUBJECT_MESHEUROPMC( + "ENRICH/MISSING/SUBJECT/MESHEUROPMC"), ENRICH_MISSING_SUBJECT_ARXIV( + "ENRICH/MISSING/SUBJECT/ARXIV"), ENRICH_MISSING_SUBJECT_JEL( + "ENRICH/MISSING/SUBJECT/JEL"), ENRICH_MISSING_SUBJECT_DDC( + "ENRICH/MISSING/SUBJECT/DDC"), ENRICH_MISSING_SUBJECT_ACM( + "ENRICH/MISSING/SUBJECT/ACM"), ENRICH_MISSING_SUBJECT_RVK( + "ENRICH/MISSING/SUBJECT/RVK"), ENRICH_MISSING_AUTHOR_ORCID( + "ENRICH/MISSING/AUTHOR/ORCID"), // ENRICHMENT MORE - ENRICH_MORE_PID("ENRICH/MORE/PID"), - ENRICH_MORE_OA_VERSION("ENRICH/MORE/OPENACCESS_VERSION"), - ENRICH_MORE_ABSTRACT("ENRICH/MORE/ABSTRACT"), - ENRICH_MORE_PUBLICATION_DATE("ENRICH/MORE/PUBLICATION_DATE"), - ENRICH_MORE_PROJECT("ENRICH/MORE/PROJECT"), - ENRICH_MORE_SUBJECT_MESHEUROPMC("ENRICH/MORE/SUBJECT/MESHEUROPMC"), - ENRICH_MORE_SUBJECT_ARXIV("ENRICH/MORE/SUBJECT/ARXIV"), - ENRICH_MORE_SUBJECT_JEL("ENRICH/MORE/SUBJECT/JEL"), - ENRICH_MORE_SUBJECT_DDC("ENRICH/MORE/SUBJECT/DDC"), - ENRICH_MORE_SUBJECT_ACM("ENRICH/MORE/SUBJECT/ACM"), - ENRICH_MORE_SUBJECT_RVK("ENRICH/MORE/SUBJECT/RVK"), + ENRICH_MORE_PID("ENRICH/MORE/PID"), ENRICH_MORE_OA_VERSION("ENRICH/MORE/OPENACCESS_VERSION"), ENRICH_MORE_ABSTRACT( + "ENRICH/MORE/ABSTRACT"), ENRICH_MORE_PUBLICATION_DATE("ENRICH/MORE/PUBLICATION_DATE"), ENRICH_MORE_PROJECT( + "ENRICH/MORE/PROJECT"), ENRICH_MORE_SOFTWARE("ENRICH/MORE/SOFTWARE"), ENRICH_MORE_SUBJECT_MESHEUROPMC( + "ENRICH/MORE/SUBJECT/MESHEUROPMC"), ENRICH_MORE_SUBJECT_ARXIV( + "ENRICH/MORE/SUBJECT/ARXIV"), ENRICH_MORE_SUBJECT_JEL( + "ENRICH/MORE/SUBJECT/JEL"), ENRICH_MORE_SUBJECT_DDC( + "ENRICH/MORE/SUBJECT/DDC"), ENRICH_MORE_SUBJECT_ACM( + "ENRICH/MORE/SUBJECT/ACM"), ENRICH_MORE_SUBJECT_RVK("ENRICH/MORE/SUBJECT/RVK"), // ADDITION ADD_BY_PROJECT("ADD/BY_PROJECT"), // OTHER RELS - ENRICH_MISSING_PUBLICATION_IS_RELATED_TO("ENRICH/MISSING/PUBLICATION/IS_RELATED_TO"), - ENRICH_MISSING_PUBLICATION_REFERENCES("ENRICH/MISSING/PUBLICATION/REFERENCES"), - ENRICH_MISSING_PUBLICATION_IS_REFERENCED_BY("ENRICH/MISSING/PUBLICATION/IS_REFERENCED_BY"), - ENRICH_MISSING_PUBLICATION_IS_SUPPLEMENTED_TO("ENRICH/MISSING/PUBLICATION/IS_SUPPLEMENTED_TO"), - ENRICH_MISSING_PUBLICATION_IS_SUPPLEMENTED_BY("ENRICH/MISSING/PUBLICATION/IS_SUPPLEMENTED_BY"), + ENRICH_MISSING_PUBLICATION_IS_RELATED_TO( + "ENRICH/MISSING/PUBLICATION/IS_RELATED_TO"), ENRICH_MISSING_PUBLICATION_REFERENCES( + "ENRICH/MISSING/PUBLICATION/REFERENCES"), ENRICH_MISSING_PUBLICATION_IS_REFERENCED_BY( + "ENRICH/MISSING/PUBLICATION/IS_REFERENCED_BY"), ENRICH_MISSING_PUBLICATION_IS_SUPPLEMENTED_TO( + "ENRICH/MISSING/PUBLICATION/IS_SUPPLEMENTED_TO"), ENRICH_MISSING_PUBLICATION_IS_SUPPLEMENTED_BY( + "ENRICH/MISSING/PUBLICATION/IS_SUPPLEMENTED_BY"), - ENRICH_MISSING_DATASET_IS_RELATED_TO("ENRICH/MISSING/DATASET/IS_RELATED_TO"), - ENRICH_MISSING_DATASET_REFERENCES("ENRICH/MISSING/DATASET/REFERENCES"), - ENRICH_MISSING_DATASET_IS_REFERENCED_BY("ENRICH/MISSING/DATASET/IS_REFERENCED_BY"), - ENRICH_MISSING_DATASET_IS_SUPPLEMENTED_TO("ENRICH/MISSING/DATASET/IS_SUPPLEMENTED_TO"), - ENRICH_MISSING_DATASET_IS_SUPPLEMENTED_BY("ENRICH/MISSING/DATASET/IS_SUPPLEMENTED_BY"); + ENRICH_MISSING_DATASET_IS_RELATED_TO("ENRICH/MISSING/DATASET/IS_RELATED_TO"), ENRICH_MISSING_DATASET_REFERENCES( + "ENRICH/MISSING/DATASET/REFERENCES"), ENRICH_MISSING_DATASET_IS_REFERENCED_BY( + "ENRICH/MISSING/DATASET/IS_REFERENCED_BY"), ENRICH_MISSING_DATASET_IS_SUPPLEMENTED_TO( + "ENRICH/MISSING/DATASET/IS_SUPPLEMENTED_TO"), ENRICH_MISSING_DATASET_IS_SUPPLEMENTED_BY( + "ENRICH/MISSING/DATASET/IS_SUPPLEMENTED_BY"),; Topic(final String path) { this.path = path; @@ -59,7 +56,9 @@ public enum Topic { public static Topic fromPath(final String path) { for (final Topic t : Topic.values()) { - if (t.getPath().equals(path)) { return t; } + if (t.getPath().equals(path)) { + return t; + } } return null; } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java index 5fdd10925..fa425a181 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java @@ -4,11 +4,14 @@ package eu.dnetlib.dhp.broker.oa; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.util.ArrayList; +import java.util.Collection; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; @@ -29,18 +32,33 @@ import eu.dnetlib.dhp.broker.model.Event; import eu.dnetlib.dhp.broker.model.EventFactory; import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingAbstract; import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingAuthorOrcid; +import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingDatasetIsReferencedBy; +import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingDatasetIsRelatedTo; +import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingDatasetIsSupplementedBy; +import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingDatasetIsSupplementedTo; +import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingDatasetReferences; import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingOpenAccess; import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingPid; import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingProject; import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingPublicationDate; +import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingPublicationIsReferencedBy; +import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingPublicationIsRelatedTo; +import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingPublicationIsSupplementedBy; +import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingPublicationIsSupplementedTo; +import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingPublicationReferences; +import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingSoftware; import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingSubject; import eu.dnetlib.dhp.broker.oa.matchers.EnrichMoreOpenAccess; import eu.dnetlib.dhp.broker.oa.matchers.EnrichMorePid; +import eu.dnetlib.dhp.broker.oa.matchers.EnrichMoreProject; +import eu.dnetlib.dhp.broker.oa.matchers.EnrichMoreSoftware; import eu.dnetlib.dhp.broker.oa.matchers.EnrichMoreSubject; import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; +import eu.dnetlib.dhp.broker.oa.util.BrokerConstants; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct; +import eu.dnetlib.dhp.schema.oaf.Project; import eu.dnetlib.dhp.schema.oaf.Publication; import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.schema.oaf.Result; @@ -50,24 +68,44 @@ public class GenerateEventsApplication { private static final Logger log = LoggerFactory.getLogger(GenerateEventsApplication.class); - private static final UpdateMatcher enrichMissingAbstract = new EnrichMissingAbstract(); - private static final UpdateMatcher enrichMissingAuthorOrcid = new EnrichMissingAuthorOrcid(); - private static final UpdateMatcher enrichMissingOpenAccess = new EnrichMissingOpenAccess(); - private static final UpdateMatcher enrichMissingPid = new EnrichMissingPid(); - private static final UpdateMatcher enrichMissingProject = new EnrichMissingProject(); - private static final UpdateMatcher enrichMissingPublicationDate = new EnrichMissingPublicationDate(); - private static final UpdateMatcher enrichMissingSubject = new EnrichMissingSubject(); - private static final UpdateMatcher enrichMoreOpenAccess = new EnrichMoreOpenAccess(); - private static final UpdateMatcher enrichMorePid = new EnrichMorePid(); - private static final UpdateMatcher enrichMoreSubject = new EnrichMoreSubject(); + // Simple Matchers + private static final UpdateMatcher enrichMissingAbstract = new EnrichMissingAbstract(); + private static final UpdateMatcher enrichMissingAuthorOrcid = new EnrichMissingAuthorOrcid(); + private static final UpdateMatcher enrichMissingOpenAccess = new EnrichMissingOpenAccess(); + private static final UpdateMatcher enrichMissingPid = new EnrichMissingPid(); + private static final UpdateMatcher enrichMissingPublicationDate = new EnrichMissingPublicationDate(); + private static final UpdateMatcher enrichMissingSubject = new EnrichMissingSubject(); + private static final UpdateMatcher enrichMoreOpenAccess = new EnrichMoreOpenAccess(); + private static final UpdateMatcher enrichMorePid = new EnrichMorePid(); + private static final UpdateMatcher enrichMoreSubject = new EnrichMoreSubject(); + + // Advanced matchers + private static final UpdateMatcher>, ?> enrichMissingProject = new EnrichMissingProject(); + private static final UpdateMatcher>, ?> enrichMoreProject = new EnrichMoreProject(); + + private static final UpdateMatcher>, ?> enrichMissingSoftware = new EnrichMissingSoftware(); + private static final UpdateMatcher>, ?> enrichMoreSoftware = new EnrichMoreSoftware(); + + private static final UpdateMatcher>, ?> enrichMisissingPublicationIsRelatedTo = new EnrichMissingPublicationIsRelatedTo(); + private static final UpdateMatcher>, ?> enrichMissingPublicationIsReferencedBy = new EnrichMissingPublicationIsReferencedBy(); + private static final UpdateMatcher>, ?> enrichMissingPublicationReferences = new EnrichMissingPublicationReferences(); + private static final UpdateMatcher>, ?> enrichMissingPublicationIsSupplementedTo = new EnrichMissingPublicationIsSupplementedTo(); + private static final UpdateMatcher>, ?> enrichMissingPublicationIsSupplementedBy = new EnrichMissingPublicationIsSupplementedBy(); + + private static final UpdateMatcher>, ?> enrichMisissingDatasetIsRelatedTo = new EnrichMissingDatasetIsRelatedTo(); + private static final UpdateMatcher>, ?> enrichMissingDatasetIsReferencedBy = new EnrichMissingDatasetIsReferencedBy(); + private static final UpdateMatcher>, ?> enrichMissingDatasetReferences = new EnrichMissingDatasetReferences(); + private static final UpdateMatcher>, ?> enrichMissingDatasetIsSupplementedTo = new EnrichMissingDatasetIsSupplementedTo(); + private static final UpdateMatcher>, ?> enrichMissingDatasetIsSupplementedBy = new EnrichMissingDatasetIsSupplementedBy(); public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); public static void main(final String[] args) throws Exception { final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils - .toString(GenerateEventsApplication.class - .getResourceAsStream("/eu/dnetlib/dhp/oa/graph/merge_claims_parameters.json"))); + .toString( + GenerateEventsApplication.class + .getResourceAsStream("/eu/dnetlib/dhp/oa/graph/merge_claims_parameters.json"))); parser.parseArgument(args); final Boolean isSparkSessionManaged = Optional @@ -82,9 +120,6 @@ public class GenerateEventsApplication { final String eventsPath = parser.get("eventsPath"); log.info("eventsPath: {}", eventsPath); - final String resultClassName = parser.get("resultTableName"); - log.info("resultTableName: {}", resultClassName); - final SparkConf conf = new SparkConf(); runWithSparkSession(conf, isSparkSessionManaged, spark -> { @@ -111,17 +146,17 @@ public class GenerateEventsApplication { final String graphPath, final Class resultClazz) { - final Dataset results = - readPath(spark, graphPath + "/" + resultClazz.getSimpleName().toLowerCase(), resultClazz) + final Dataset results = readPath( + spark, graphPath + "/" + resultClazz.getSimpleName().toLowerCase(), resultClazz) .filter(r -> r.getDataInfo().getDeletedbyinference()); - final Dataset rels = - readPath(spark, graphPath + "/relation", Relation.class) - .filter(r -> r.getRelClass().equals("TODO")); // TODO mergedIN + final Dataset rels = readPath(spark, graphPath + "/relation", Relation.class) + .filter(r -> r.getRelClass().equals(BrokerConstants.IS_MERGED_IN_CLASS)); final Column c = null; // TODO - final Dataset aa = results.joinWith(rels, results.col("id").equalTo(rels.col("source")), "inner") + final Dataset aa = results + .joinWith(rels, results.col("id").equalTo(rels.col("source")), "inner") .groupBy(rels.col("target")) .agg(c) .filter(x -> x.size() > 1) @@ -134,7 +169,7 @@ public class GenerateEventsApplication { } - private List generateSimpleEvents(final Result... children) { + private List generateSimpleEvents(final Collection children) { final List> list = new ArrayList<>(); for (final Result target : children) { @@ -142,7 +177,6 @@ public class GenerateEventsApplication { list.addAll(enrichMissingAuthorOrcid.searchUpdatesForRecord(target, children)); list.addAll(enrichMissingOpenAccess.searchUpdatesForRecord(target, children)); list.addAll(enrichMissingPid.searchUpdatesForRecord(target, children)); - list.addAll(enrichMissingProject.searchUpdatesForRecord(target, children)); list.addAll(enrichMissingPublicationDate.searchUpdatesForRecord(target, children)); list.addAll(enrichMissingSubject.searchUpdatesForRecord(target, children)); list.addAll(enrichMoreOpenAccess.searchUpdatesForRecord(target, children)); @@ -153,6 +187,87 @@ public class GenerateEventsApplication { return list.stream().map(EventFactory::newBrokerEvent).collect(Collectors.toList()); } + private List generateProjectsEvents(final Collection>> childrenWithProjects) { + final List> list = new ArrayList<>(); + + for (final Pair> target : childrenWithProjects) { + list.addAll(enrichMissingProject.searchUpdatesForRecord(target, childrenWithProjects)); + list.addAll(enrichMoreProject.searchUpdatesForRecord(target, childrenWithProjects)); + } + + return list.stream().map(EventFactory::newBrokerEvent).collect(Collectors.toList()); + } + + private List generateSoftwareEvents(final Collection>> childrenWithSoftwares) { + final List> list = new ArrayList<>(); + + for (final Pair> target : childrenWithSoftwares) { + list.addAll(enrichMissingSoftware.searchUpdatesForRecord(target, childrenWithSoftwares)); + list.addAll(enrichMoreSoftware.searchUpdatesForRecord(target, childrenWithSoftwares)); + } + return list.stream().map(EventFactory::newBrokerEvent).collect(Collectors.toList()); + } + + private List generatePublicationRelatedEvents(final String relType, + final Collection>>> childrenWithRels) { + + final List> list = new ArrayList<>(); + + final List>> cleanedChildrens = childrenWithRels + .stream() + .filter(p -> p.getRight().containsKey(relType)) + .map(p -> Pair.of(p.getLeft(), p.getRight().get(relType))) + .filter(p -> p.getRight().size() > 0) + .collect(Collectors.toList()); + + for (final Pair> target : cleanedChildrens) { + if (relType.equals("isRelatedTo")) { + list.addAll(enrichMisissingPublicationIsRelatedTo.searchUpdatesForRecord(target, cleanedChildrens)); + } else if (relType.equals("references")) { + list.addAll(enrichMissingPublicationReferences.searchUpdatesForRecord(target, cleanedChildrens)); + } else if (relType.equals("isReferencedBy")) { + list.addAll(enrichMissingPublicationIsReferencedBy.searchUpdatesForRecord(target, cleanedChildrens)); + } else if (relType.equals("isSupplementedTo")) { + list.addAll(enrichMissingPublicationIsSupplementedTo.searchUpdatesForRecord(target, cleanedChildrens)); + } else if (relType.equals("isSupplementedBy")) { + list.addAll(enrichMissingPublicationIsSupplementedBy.searchUpdatesForRecord(target, cleanedChildrens)); + } + } + + return list.stream().map(EventFactory::newBrokerEvent).collect(Collectors.toList()); + + } + + private List generateDatasetRelatedEvents(final String relType, + final Collection>>> childrenWithRels) { + + final List> list = new ArrayList<>(); + + final List>> cleanedChildrens = childrenWithRels + .stream() + .filter(p -> p.getRight().containsKey(relType)) + .map(p -> Pair.of(p.getLeft(), p.getRight().get(relType))) + .filter(p -> p.getRight().size() > 0) + .collect(Collectors.toList()); + + for (final Pair> target : cleanedChildrens) { + if (relType.equals("isRelatedTo")) { + list.addAll(enrichMisissingDatasetIsRelatedTo.searchUpdatesForRecord(target, cleanedChildrens)); + } else if (relType.equals("references")) { + list.addAll(enrichMissingDatasetReferences.searchUpdatesForRecord(target, cleanedChildrens)); + } else if (relType.equals("isReferencedBy")) { + list.addAll(enrichMissingDatasetIsReferencedBy.searchUpdatesForRecord(target, cleanedChildrens)); + } else if (relType.equals("isSupplementedTo")) { + list.addAll(enrichMissingDatasetIsSupplementedTo.searchUpdatesForRecord(target, cleanedChildrens)); + } else if (relType.equals("isSupplementedBy")) { + list.addAll(enrichMissingDatasetIsSupplementedBy.searchUpdatesForRecord(target, cleanedChildrens)); + } + } + + return list.stream().map(EventFactory::newBrokerEvent).collect(Collectors.toList()); + + } + public static Dataset readPath( final SparkSession spark, final String inputPath, diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingAbstract.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingAbstract.java index 43cf738f8..6dab6355f 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingAbstract.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingAbstract.java @@ -9,7 +9,7 @@ import eu.dnetlib.dhp.broker.model.Topic; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.schema.oaf.Result; -public class EnrichMissingAbstract extends UpdateMatcher { +public class EnrichMissingAbstract extends UpdateMatcher { public EnrichMissingAbstract() { super(false); @@ -24,7 +24,8 @@ public class EnrichMissingAbstract extends UpdateMatcher { } @Override - public UpdateInfo generateUpdateInfo(final String highlightValue, final Result source, + public UpdateInfo generateUpdateInfo(final String highlightValue, + final Result source, final Result target) { return new UpdateInfo<>( Topic.ENRICH_MISSING_ABSTRACT, diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingAuthorOrcid.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingAuthorOrcid.java index beeccdbe8..c7146ad79 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingAuthorOrcid.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingAuthorOrcid.java @@ -10,7 +10,7 @@ import eu.dnetlib.dhp.broker.model.Topic; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.schema.oaf.Result; -public class EnrichMissingAuthorOrcid extends UpdateMatcher> { +public class EnrichMissingAuthorOrcid extends UpdateMatcher> { public EnrichMissingAuthorOrcid() { super(true); @@ -24,7 +24,8 @@ public class EnrichMissingAuthorOrcid extends UpdateMatcher @Override public UpdateInfo> generateUpdateInfo(final Pair highlightValue, - final Result source, final Result target) { + final Result source, + final Result target) { return new UpdateInfo<>( Topic.ENRICH_MISSING_AUTHOR_ORCID, highlightValue, source, target, diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingDatasetIsReferencedBy.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingDatasetIsReferencedBy.java new file mode 100644 index 000000000..3b9326fef --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingDatasetIsReferencedBy.java @@ -0,0 +1,38 @@ + +package eu.dnetlib.dhp.broker.oa.matchers; + +import java.util.List; + +import org.apache.commons.lang3.tuple.Pair; + +import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; +import eu.dnetlib.dhp.schema.oaf.Dataset; +import eu.dnetlib.dhp.schema.oaf.Result; + +public class EnrichMissingDatasetIsReferencedBy + extends UpdateMatcher>, eu.dnetlib.broker.objects.Dataset> { + + public EnrichMissingDatasetIsReferencedBy() { + super(true); + } + + @Override + protected List> findUpdates(final Pair> source, + final Pair> target) { + // TODO Auto-generated method stub + return null; + } + + @Override + protected UpdateInfo generateUpdateInfo( + final eu.dnetlib.broker.objects.Dataset highlightValue, + final Pair> source, + final Pair> target) { + return new UpdateInfo<>( + Topic.ENRICH_MISSING_DATASET_IS_REFERENCED_BY, + highlightValue, source.getLeft(), target.getLeft(), + (p, rel) -> p.getDatasets().add(rel), + rel -> rel.getInstances().get(0).getUrl()); + } +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingDatasetIsRelatedTo.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingDatasetIsRelatedTo.java new file mode 100644 index 000000000..35f7c52b4 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingDatasetIsRelatedTo.java @@ -0,0 +1,38 @@ + +package eu.dnetlib.dhp.broker.oa.matchers; + +import java.util.List; + +import org.apache.commons.lang3.tuple.Pair; + +import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; +import eu.dnetlib.dhp.schema.oaf.Dataset; +import eu.dnetlib.dhp.schema.oaf.Result; + +public class EnrichMissingDatasetIsRelatedTo + extends UpdateMatcher>, eu.dnetlib.broker.objects.Dataset> { + + public EnrichMissingDatasetIsRelatedTo() { + super(true); + } + + @Override + protected List> findUpdates(final Pair> source, + final Pair> target) { + // TODO Auto-generated method stub + return null; + } + + @Override + protected UpdateInfo generateUpdateInfo( + final eu.dnetlib.broker.objects.Dataset highlightValue, + final Pair> source, + final Pair> target) { + return new UpdateInfo<>( + Topic.ENRICH_MISSING_DATASET_IS_RELATED_TO, + highlightValue, source.getLeft(), target.getLeft(), + (p, rel) -> p.getDatasets().add(rel), + rel -> rel.getInstances().get(0).getUrl()); + } +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingDatasetIsSupplementedBy.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingDatasetIsSupplementedBy.java new file mode 100644 index 000000000..1faa305b5 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingDatasetIsSupplementedBy.java @@ -0,0 +1,38 @@ + +package eu.dnetlib.dhp.broker.oa.matchers; + +import java.util.List; + +import org.apache.commons.lang3.tuple.Pair; + +import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; +import eu.dnetlib.dhp.schema.oaf.Dataset; +import eu.dnetlib.dhp.schema.oaf.Result; + +public class EnrichMissingDatasetIsSupplementedBy + extends UpdateMatcher>, eu.dnetlib.broker.objects.Dataset> { + + public EnrichMissingDatasetIsSupplementedBy() { + super(true); + } + + @Override + protected List> findUpdates(final Pair> source, + final Pair> target) { + // TODO Auto-generated method stub + return null; + } + + @Override + protected UpdateInfo generateUpdateInfo( + final eu.dnetlib.broker.objects.Dataset highlightValue, + final Pair> source, + final Pair> target) { + return new UpdateInfo<>( + Topic.ENRICH_MISSING_DATASET_IS_SUPPLEMENTED_BY, + highlightValue, source.getLeft(), target.getLeft(), + (p, rel) -> p.getDatasets().add(rel), + rel -> rel.getInstances().get(0).getUrl()); + } +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingDatasetIsSupplementedTo.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingDatasetIsSupplementedTo.java new file mode 100644 index 000000000..d1b067272 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingDatasetIsSupplementedTo.java @@ -0,0 +1,38 @@ + +package eu.dnetlib.dhp.broker.oa.matchers; + +import java.util.List; + +import org.apache.commons.lang3.tuple.Pair; + +import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; +import eu.dnetlib.dhp.schema.oaf.Dataset; +import eu.dnetlib.dhp.schema.oaf.Result; + +public class EnrichMissingDatasetIsSupplementedTo + extends UpdateMatcher>, eu.dnetlib.broker.objects.Dataset> { + + public EnrichMissingDatasetIsSupplementedTo() { + super(true); + } + + @Override + protected List> findUpdates(final Pair> source, + final Pair> target) { + // TODO Auto-generated method stub + return null; + } + + @Override + protected UpdateInfo generateUpdateInfo( + final eu.dnetlib.broker.objects.Dataset highlightValue, + final Pair> source, + final Pair> target) { + return new UpdateInfo<>( + Topic.ENRICH_MISSING_DATASET_IS_SUPPLEMENTED_TO, + highlightValue, source.getLeft(), target.getLeft(), + (p, rel) -> p.getDatasets().add(rel), + rel -> rel.getInstances().get(0).getUrl()); + } +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingDatasetReferences.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingDatasetReferences.java new file mode 100644 index 000000000..ce6adeba2 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingDatasetReferences.java @@ -0,0 +1,38 @@ + +package eu.dnetlib.dhp.broker.oa.matchers; + +import java.util.List; + +import org.apache.commons.lang3.tuple.Pair; + +import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; +import eu.dnetlib.dhp.schema.oaf.Dataset; +import eu.dnetlib.dhp.schema.oaf.Result; + +public class EnrichMissingDatasetReferences + extends UpdateMatcher>, eu.dnetlib.broker.objects.Dataset> { + + public EnrichMissingDatasetReferences() { + super(true); + } + + @Override + protected List> findUpdates(final Pair> source, + final Pair> target) { + // TODO Auto-generated method stub + return null; + } + + @Override + protected UpdateInfo generateUpdateInfo( + final eu.dnetlib.broker.objects.Dataset highlightValue, + final Pair> source, + final Pair> target) { + return new UpdateInfo<>( + Topic.ENRICH_MISSING_DATASET_REFERENCES, + highlightValue, source.getLeft(), target.getLeft(), + (p, rel) -> p.getDatasets().add(rel), + rel -> rel.getInstances().get(0).getUrl()); + } +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingOpenAccess.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingOpenAccess.java index a4a2ea0c6..81263c6c3 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingOpenAccess.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingOpenAccess.java @@ -12,7 +12,7 @@ import eu.dnetlib.dhp.broker.oa.util.ConversionUtils; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.schema.oaf.Result; -public class EnrichMissingOpenAccess extends UpdateMatcher { +public class EnrichMissingOpenAccess extends UpdateMatcher { public EnrichMissingOpenAccess() { super(true); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPid.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPid.java index a8df62541..5f10bb4d9 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPid.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPid.java @@ -11,7 +11,7 @@ import eu.dnetlib.dhp.broker.oa.util.ConversionUtils; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.schema.oaf.Result; -public class EnrichMissingPid extends UpdateMatcher { +public class EnrichMissingPid extends UpdateMatcher { public EnrichMissingPid() { super(true); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingProject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingProject.java index b6e5b3b57..0197c99b1 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingProject.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingProject.java @@ -4,30 +4,35 @@ package eu.dnetlib.dhp.broker.oa.matchers; import java.util.Arrays; import java.util.List; -import eu.dnetlib.broker.objects.Project; +import org.apache.commons.lang3.tuple.Pair; + import eu.dnetlib.dhp.broker.model.Topic; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; +import eu.dnetlib.dhp.schema.oaf.Project; import eu.dnetlib.dhp.schema.oaf.Result; -public class EnrichMissingProject extends UpdateMatcher { +public class EnrichMissingProject + extends UpdateMatcher>, eu.dnetlib.broker.objects.Project> { public EnrichMissingProject() { super(true); } @Override - protected List> findUpdates(final Result source, final Result target) { - // return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f)); + protected List> findUpdates(final Pair> source, + final Pair> target) { + // TODO return Arrays.asList(); } @Override - public UpdateInfo generateUpdateInfo(final Project highlightValue, - final Result source, - final Result target) { + public UpdateInfo generateUpdateInfo( + final eu.dnetlib.broker.objects.Project highlightValue, + final Pair> source, + final Pair> target) { return new UpdateInfo<>( Topic.ENRICH_MISSING_PROJECT, - highlightValue, source, target, + highlightValue, source.getLeft(), target.getLeft(), (p, prj) -> p.getProjects().add(prj), prj -> prj.getFunder() + "::" + prj.getFundingProgram() + prj.getCode()); } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationDate.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationDate.java index 372a4e4c9..19ef2bab7 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationDate.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationDate.java @@ -9,7 +9,7 @@ import eu.dnetlib.dhp.broker.model.Topic; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.schema.oaf.Result; -public class EnrichMissingPublicationDate extends UpdateMatcher { +public class EnrichMissingPublicationDate extends UpdateMatcher { public EnrichMissingPublicationDate() { super(false); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationIsReferencedBy.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationIsReferencedBy.java new file mode 100644 index 000000000..8bcee5a1f --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationIsReferencedBy.java @@ -0,0 +1,42 @@ + +package eu.dnetlib.dhp.broker.oa.matchers; + +import java.util.Arrays; +import java.util.List; + +import org.apache.commons.lang3.tuple.Pair; + +import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; +import eu.dnetlib.dhp.schema.oaf.Publication; +import eu.dnetlib.dhp.schema.oaf.Result; + +public class EnrichMissingPublicationIsReferencedBy + extends UpdateMatcher>, eu.dnetlib.broker.objects.Publication> { + + public EnrichMissingPublicationIsReferencedBy() { + super(true); + } + + @Override + protected List> findUpdates( + final Pair> source, + final Pair> target) { + // TODO Auto-generated method stub + return Arrays.asList(); + } + + @Override + protected UpdateInfo generateUpdateInfo( + final eu.dnetlib.broker.objects.Publication highlightValue, + final Pair> source, + final Pair> target) { + return new UpdateInfo<>( + Topic.ENRICH_MISSING_PUBLICATION_IS_REFERENCED_BY, + highlightValue, source.getLeft(), target.getLeft(), + (p, rel) -> { + }, // p.getPublications().add(rel), //TODO available in the future release of dnet-openaire-broker-common + rel -> rel.getOriginalId()); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationIsRelatedTo.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationIsRelatedTo.java new file mode 100644 index 000000000..0c16f9f56 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationIsRelatedTo.java @@ -0,0 +1,42 @@ + +package eu.dnetlib.dhp.broker.oa.matchers; + +import java.util.Arrays; +import java.util.List; + +import org.apache.commons.lang3.tuple.Pair; + +import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; +import eu.dnetlib.dhp.schema.oaf.Publication; +import eu.dnetlib.dhp.schema.oaf.Result; + +public class EnrichMissingPublicationIsRelatedTo + extends UpdateMatcher>, eu.dnetlib.broker.objects.Publication> { + + public EnrichMissingPublicationIsRelatedTo() { + super(true); + } + + @Override + protected List> findUpdates( + final Pair> source, + final Pair> target) { + // TODO Auto-generated method stub + return Arrays.asList(); + } + + @Override + protected UpdateInfo generateUpdateInfo( + final eu.dnetlib.broker.objects.Publication highlightValue, + final Pair> source, + final Pair> target) { + return new UpdateInfo<>( + Topic.ENRICH_MISSING_PUBLICATION_IS_RELATED_TO, + highlightValue, source.getLeft(), target.getLeft(), + (p, rel) -> { + }, // p.getPublications().add(rel), //TODO available in the future release of dnet-openaire-broker-common + rel -> rel.getOriginalId()); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationIsSupplementedBy.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationIsSupplementedBy.java new file mode 100644 index 000000000..0b3c33270 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationIsSupplementedBy.java @@ -0,0 +1,42 @@ + +package eu.dnetlib.dhp.broker.oa.matchers; + +import java.util.Arrays; +import java.util.List; + +import org.apache.commons.lang3.tuple.Pair; + +import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; +import eu.dnetlib.dhp.schema.oaf.Publication; +import eu.dnetlib.dhp.schema.oaf.Result; + +public class EnrichMissingPublicationIsSupplementedBy + extends UpdateMatcher>, eu.dnetlib.broker.objects.Publication> { + + public EnrichMissingPublicationIsSupplementedBy() { + super(true); + } + + @Override + protected List> findUpdates( + final Pair> source, + final Pair> target) { + // TODO Auto-generated method stub + return Arrays.asList(); + } + + @Override + protected UpdateInfo generateUpdateInfo( + final eu.dnetlib.broker.objects.Publication highlightValue, + final Pair> source, + final Pair> target) { + return new UpdateInfo<>( + Topic.ENRICH_MISSING_PUBLICATION_IS_SUPPLEMENTED_BY, + highlightValue, source.getLeft(), target.getLeft(), + (p, rel) -> { + }, // p.getPublications().add(rel), //TODO available in the future release of dnet-openaire-broker-common + rel -> rel.getOriginalId()); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationIsSupplementedTo.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationIsSupplementedTo.java new file mode 100644 index 000000000..0e72a8423 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationIsSupplementedTo.java @@ -0,0 +1,42 @@ + +package eu.dnetlib.dhp.broker.oa.matchers; + +import java.util.Arrays; +import java.util.List; + +import org.apache.commons.lang3.tuple.Pair; + +import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; +import eu.dnetlib.dhp.schema.oaf.Publication; +import eu.dnetlib.dhp.schema.oaf.Result; + +public class EnrichMissingPublicationIsSupplementedTo + extends UpdateMatcher>, eu.dnetlib.broker.objects.Publication> { + + public EnrichMissingPublicationIsSupplementedTo() { + super(true); + } + + @Override + protected List> findUpdates( + final Pair> source, + final Pair> target) { + // TODO Auto-generated method stub + return Arrays.asList(); + } + + @Override + protected UpdateInfo generateUpdateInfo( + final eu.dnetlib.broker.objects.Publication highlightValue, + final Pair> source, + final Pair> target) { + return new UpdateInfo<>( + Topic.ENRICH_MISSING_PUBLICATION_IS_SUPPLEMENTED_TO, + highlightValue, source.getLeft(), target.getLeft(), + (p, rel) -> { + }, // p.getPublications().add(rel), //TODO available in the future release of dnet-openaire-broker-common + rel -> rel.getOriginalId()); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationReferences.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationReferences.java new file mode 100644 index 000000000..6d1124974 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationReferences.java @@ -0,0 +1,42 @@ + +package eu.dnetlib.dhp.broker.oa.matchers; + +import java.util.Arrays; +import java.util.List; + +import org.apache.commons.lang3.tuple.Pair; + +import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; +import eu.dnetlib.dhp.schema.oaf.Publication; +import eu.dnetlib.dhp.schema.oaf.Result; + +public class EnrichMissingPublicationReferences + extends UpdateMatcher>, eu.dnetlib.broker.objects.Publication> { + + public EnrichMissingPublicationReferences() { + super(true); + } + + @Override + protected List> findUpdates( + final Pair> source, + final Pair> target) { + // TODO Auto-generated method stub + return Arrays.asList(); + } + + @Override + protected UpdateInfo generateUpdateInfo( + final eu.dnetlib.broker.objects.Publication highlightValue, + final Pair> source, + final Pair> target) { + return new UpdateInfo<>( + Topic.ENRICH_MISSING_PUBLICATION_REFERENCES, + highlightValue, source.getLeft(), target.getLeft(), + (p, rel) -> { + }, // p.getPublications().add(rel), //TODO available in the future release of dnet-openaire-broker-common + rel -> rel.getOriginalId()); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingSoftware.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingSoftware.java new file mode 100644 index 000000000..954ee48be --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingSoftware.java @@ -0,0 +1,41 @@ + +package eu.dnetlib.dhp.broker.oa.matchers; + +import java.util.Arrays; +import java.util.List; + +import org.apache.commons.lang3.tuple.Pair; + +import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; +import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.schema.oaf.Software; + +public class EnrichMissingSoftware + extends UpdateMatcher>, eu.dnetlib.broker.objects.Software> { + + public EnrichMissingSoftware() { + super(true); + } + + @Override + protected List> findUpdates( + final Pair> source, + final Pair> target) { + // TODO + return Arrays.asList(); + } + + @Override + public UpdateInfo generateUpdateInfo( + final eu.dnetlib.broker.objects.Software highlightValue, + final Pair> source, + final Pair> target) { + return new UpdateInfo<>( + Topic.ENRICH_MISSING_SOFTWARE, + highlightValue, source.getLeft(), target.getLeft(), + (p, s) -> p.getSoftwares().add(s), + s -> s.getName()); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingSubject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingSubject.java index 79e9d469b..a7c72f6ea 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingSubject.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingSubject.java @@ -14,7 +14,7 @@ import eu.dnetlib.dhp.schema.oaf.Qualifier; import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.StructuredProperty; -public class EnrichMissingSubject extends UpdateMatcher> { +public class EnrichMissingSubject extends UpdateMatcher> { public EnrichMissingSubject() { super(true); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMoreOpenAccess.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMoreOpenAccess.java index 40c9b0500..2cd2775c9 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMoreOpenAccess.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMoreOpenAccess.java @@ -12,7 +12,7 @@ import eu.dnetlib.dhp.broker.oa.util.ConversionUtils; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.schema.oaf.Result; -public class EnrichMoreOpenAccess extends UpdateMatcher { +public class EnrichMoreOpenAccess extends UpdateMatcher { public EnrichMoreOpenAccess() { super(true); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMorePid.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMorePid.java index 0e7b7766a..048d19747 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMorePid.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMorePid.java @@ -11,7 +11,7 @@ import eu.dnetlib.dhp.broker.oa.util.ConversionUtils; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.schema.oaf.Result; -public class EnrichMorePid extends UpdateMatcher { +public class EnrichMorePid extends UpdateMatcher { public EnrichMorePid() { super(true); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMoreProject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMoreProject.java new file mode 100644 index 000000000..4bf45d943 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMoreProject.java @@ -0,0 +1,39 @@ + +package eu.dnetlib.dhp.broker.oa.matchers; + +import java.util.Arrays; +import java.util.List; + +import org.apache.commons.lang3.tuple.Pair; + +import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; +import eu.dnetlib.dhp.schema.oaf.Project; +import eu.dnetlib.dhp.schema.oaf.Result; + +public class EnrichMoreProject extends UpdateMatcher>, eu.dnetlib.broker.objects.Project> { + + public EnrichMoreProject() { + super(true); + } + + @Override + protected List> findUpdates(final Pair> source, + final Pair> target) { + // TODO + return Arrays.asList(); + } + + @Override + public UpdateInfo generateUpdateInfo( + final eu.dnetlib.broker.objects.Project highlightValue, + final Pair> source, + final Pair> target) { + return new UpdateInfo<>( + Topic.ENRICH_MORE_PROJECT, + highlightValue, source.getLeft(), target.getLeft(), + (p, prj) -> p.getProjects().add(prj), + prj -> prj.getFunder() + "::" + prj.getFundingProgram() + prj.getCode()); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMoreSoftware.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMoreSoftware.java new file mode 100644 index 000000000..9760504f6 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMoreSoftware.java @@ -0,0 +1,41 @@ + +package eu.dnetlib.dhp.broker.oa.matchers; + +import java.util.Arrays; +import java.util.List; + +import org.apache.commons.lang3.tuple.Pair; + +import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; +import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.schema.oaf.Software; + +public class EnrichMoreSoftware + extends UpdateMatcher>, eu.dnetlib.broker.objects.Software> { + + public EnrichMoreSoftware() { + super(true); + } + + @Override + protected List> findUpdates( + final Pair> source, + final Pair> target) { + // TODO + return Arrays.asList(); + } + + @Override + public UpdateInfo generateUpdateInfo( + final eu.dnetlib.broker.objects.Software highlightValue, + final Pair> source, + final Pair> target) { + return new UpdateInfo<>( + Topic.ENRICH_MORE_SOFTWARE, + highlightValue, source.getLeft(), target.getLeft(), + (p, s) -> p.getSoftwares().add(s), + s -> s.getName()); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMoreSubject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMoreSubject.java index e6374479b..67c2f0116 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMoreSubject.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMoreSubject.java @@ -12,7 +12,7 @@ import eu.dnetlib.dhp.broker.oa.util.ConversionUtils; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.schema.oaf.Result; -public class EnrichMoreSubject extends UpdateMatcher> { +public class EnrichMoreSubject extends UpdateMatcher> { public EnrichMoreSubject() { super(true); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcher.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcher.java index d91b03200..5bfe108a5 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcher.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcher.java @@ -12,9 +12,8 @@ import org.apache.commons.lang3.StringUtils; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.schema.oaf.Field; -import eu.dnetlib.dhp.schema.oaf.Result; -public abstract class UpdateMatcher { +public abstract class UpdateMatcher { private final boolean multipleUpdate; @@ -22,15 +21,16 @@ public abstract class UpdateMatcher { this.multipleUpdate = multipleUpdate; } - public Collection> searchUpdatesForRecord(final Result res, final Result... others) { + public Collection> searchUpdatesForRecord(final K res, final Collection others) { final Map> infoMap = new HashMap<>(); - for (final Result source : others) { + for (final K source : others) { if (source != res) { for (final UpdateInfo info : findUpdates(source, res)) { final String s = DigestUtils.md5Hex(info.getHighlightValueAsString()); - if (!infoMap.containsKey(s) || infoMap.get(s).getTrust() < info.getTrust()) {} else { + if (!infoMap.containsKey(s) || infoMap.get(s).getTrust() < info.getTrust()) { + } else { infoMap.put(s, info); } } @@ -51,11 +51,11 @@ public abstract class UpdateMatcher { } } - protected abstract List> findUpdates(Result source, Result target); + protected abstract List> findUpdates(K source, K target); protected abstract UpdateInfo generateUpdateInfo(final T highlightValue, - final Result source, - final Result target); + final K source, + final K target); protected static boolean isMissing(final List> list) { return list == null || list.isEmpty() || StringUtils.isBlank(list.get(0).getValue()); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/BrokerConstants.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/BrokerConstants.java index d61d5bfb7..8e97192bf 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/BrokerConstants.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/BrokerConstants.java @@ -4,4 +4,6 @@ package eu.dnetlib.dhp.broker.oa.util; public class BrokerConstants { public final static String OPEN_ACCESS = "OPEN"; + public final static String IS_MERGED_IN_CLASS = "isMergedIn"; + }