From 2393d9da2f376890cd9fa62936aaef97b6580c8e Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Fri, 26 Jun 2020 11:20:45 +0200 Subject: [PATCH] limits --- .../dhp/broker/oa/matchers/UpdateMatcher.java | 33 ++++++++++--------- .../AbstractEnrichMissingDataset.java | 2 +- .../relatedProjects/EnrichMissingProject.java | 2 +- .../relatedProjects/EnrichMoreProject.java | 2 +- .../AbstractEnrichMissingPublication.java | 2 +- .../EnrichMissingSoftware.java | 2 +- .../relatedSoftware/EnrichMoreSoftware.java | 2 +- .../simple/EnrichMissingAbstract.java | 2 +- .../simple/EnrichMissingAuthorOrcid.java | 2 +- .../simple/EnrichMissingOpenAccess.java | 2 +- .../oa/matchers/simple/EnrichMissingPid.java | 2 +- .../simple/EnrichMissingPublicationDate.java | 2 +- .../matchers/simple/EnrichMissingSubject.java | 2 +- .../matchers/simple/EnrichMoreOpenAccess.java | 2 +- .../oa/matchers/simple/EnrichMorePid.java | 2 +- .../oa/matchers/simple/EnrichMoreSubject.java | 2 +- .../dhp/broker/oa/util/EventFinder.java | 27 ++++++++++----- 17 files changed, 51 insertions(+), 39 deletions(-) diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcher.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcher.java index c0287bda0..7f82f9a2b 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcher.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcher.java @@ -1,13 +1,14 @@ package eu.dnetlib.dhp.broker.oa.matchers; -import java.util.Arrays; +import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.function.BiConsumer; import java.util.function.Function; +import java.util.stream.Collectors; import org.apache.commons.codec.digest.DigestUtils; import org.apache.commons.lang3.StringUtils; @@ -19,15 +20,15 @@ import eu.dnetlib.pace.config.DedupConfig; public abstract class UpdateMatcher { - private final boolean multipleUpdate; + private final int maxNumber; private final Function topicFunction; private final BiConsumer compileHighlightFunction; private final Function highlightToStringFunction; - public UpdateMatcher(final boolean multipleUpdate, final Function topicFunction, + public UpdateMatcher(final int maxNumber, final Function topicFunction, final BiConsumer compileHighlightFunction, final Function highlightToStringFunction) { - this.multipleUpdate = multipleUpdate; + this.maxNumber = maxNumber; this.topicFunction = topicFunction; this.compileHighlightFunction = compileHighlightFunction; this.highlightToStringFunction = highlightToStringFunction; @@ -57,17 +58,19 @@ public abstract class UpdateMatcher { } } - final Collection> values = infoMap.values(); + final List> values = infoMap + .values() + .stream() + .sorted((o1, o2) -> Float.compare(o2.getTrust(), o1.getTrust())) // DESCENDING + .collect(Collectors.toList()); - if (values.isEmpty() || multipleUpdate) { - return values; + if (values.isEmpty()) { + return new ArrayList<>(); + } else if (values.size() > maxNumber) { + System.err.println("Too many events (" + values.size() + ") matched by " + getClass().getSimpleName()); + return values.subList(0, maxNumber); } else { - final UpdateInfo v = values - .stream() - .sorted((o1, o2) -> Float.compare(o1.getTrust(), o2.getTrust())) - .findFirst() - .get(); - return Arrays.asList(v); + return values; } } @@ -81,8 +84,8 @@ public abstract class UpdateMatcher { return StringUtils.isBlank(field); } - public boolean isMultipleUpdate() { - return multipleUpdate; + public int getMaxNumber() { + return maxNumber; } public Function getTopicFunction() { diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/AbstractEnrichMissingDataset.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/AbstractEnrichMissingDataset.java index c8b93596a..f21c1c7b3 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/AbstractEnrichMissingDataset.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/AbstractEnrichMissingDataset.java @@ -13,7 +13,7 @@ import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; public abstract class AbstractEnrichMissingDataset extends UpdateMatcher { public AbstractEnrichMissingDataset(final Topic topic) { - super(true, + super(10, rel -> topic, (p, rel) -> p.getDatasets().add(rel), rel -> rel.getOpenaireId()); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMissingProject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMissingProject.java index 49c546bba..4b563d381 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMissingProject.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMissingProject.java @@ -12,7 +12,7 @@ import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; public class EnrichMissingProject extends UpdateMatcher { public EnrichMissingProject() { - super(true, + super(20, prj -> Topic.ENRICH_MISSING_PROJECT, (p, prj) -> p.getProjects().add(prj), prj -> prj.getFunder() + "::" + prj.getFundingProgram() + prj.getCode()); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMoreProject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMoreProject.java index 6954a3fb5..85b2cbe28 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMoreProject.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMoreProject.java @@ -13,7 +13,7 @@ import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; public class EnrichMoreProject extends UpdateMatcher { public EnrichMoreProject() { - super(true, + super(20, prj -> Topic.ENRICH_MORE_PROJECT, (p, prj) -> p.getProjects().add(prj), prj -> projectAsString(prj)); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/AbstractEnrichMissingPublication.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/AbstractEnrichMissingPublication.java index cc4f68f87..f951131b1 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/AbstractEnrichMissingPublication.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/AbstractEnrichMissingPublication.java @@ -13,7 +13,7 @@ import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; public abstract class AbstractEnrichMissingPublication extends UpdateMatcher { public AbstractEnrichMissingPublication(final Topic topic) { - super(true, + super(10, rel -> topic, (p, rel) -> p.getPublications().add(rel), rel -> rel.getOpenaireId()); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedSoftware/EnrichMissingSoftware.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedSoftware/EnrichMissingSoftware.java index d01f0c370..a638024bc 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedSoftware/EnrichMissingSoftware.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedSoftware/EnrichMissingSoftware.java @@ -13,7 +13,7 @@ public class EnrichMissingSoftware extends UpdateMatcher { public EnrichMissingSoftware() { - super(true, + super(10, s -> Topic.ENRICH_MISSING_SOFTWARE, (p, s) -> p.getSoftwares().add(s), s -> s.getOpenaireId()); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedSoftware/EnrichMoreSoftware.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedSoftware/EnrichMoreSoftware.java index a612b6074..2bc370187 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedSoftware/EnrichMoreSoftware.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedSoftware/EnrichMoreSoftware.java @@ -13,7 +13,7 @@ import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; public class EnrichMoreSoftware extends UpdateMatcher { public EnrichMoreSoftware() { - super(true, + super(10, s -> Topic.ENRICH_MORE_SOFTWARE, (p, s) -> p.getSoftwares().add(s), s -> s.getOpenaireId()); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAbstract.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAbstract.java index 73462bae8..b61696e45 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAbstract.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAbstract.java @@ -12,7 +12,7 @@ import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; public class EnrichMissingAbstract extends UpdateMatcher { public EnrichMissingAbstract() { - super(false, + super(1, s -> Topic.ENRICH_MISSING_ABSTRACT, (p, s) -> p.getAbstracts().add(s), s -> s); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAuthorOrcid.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAuthorOrcid.java index 2a01188a9..7bbc43fe3 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAuthorOrcid.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAuthorOrcid.java @@ -15,7 +15,7 @@ import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; public class EnrichMissingAuthorOrcid extends UpdateMatcher { public EnrichMissingAuthorOrcid() { - super(true, + super(40, aut -> Topic.ENRICH_MISSING_AUTHOR_ORCID, (p, aut) -> p.getCreators().add(aut), aut -> aut.getOrcid()); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingOpenAccess.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingOpenAccess.java index 487382957..41a00dcd1 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingOpenAccess.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingOpenAccess.java @@ -14,7 +14,7 @@ import eu.dnetlib.dhp.broker.oa.util.BrokerConstants; public class EnrichMissingOpenAccess extends UpdateMatcher { public EnrichMissingOpenAccess() { - super(true, + super(20, i -> Topic.ENRICH_MISSING_OA_VERSION, (p, i) -> p.getInstances().add(i), OaBrokerInstance::getUrl); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPid.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPid.java index ee1617b1e..4863bdeb7 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPid.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPid.java @@ -13,7 +13,7 @@ import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; public class EnrichMissingPid extends UpdateMatcher { public EnrichMissingPid() { - super(true, + super(10, pid -> Topic.ENRICH_MISSING_PID, (p, pid) -> p.getPids().add(pid), pid -> pid.getType() + "::" + pid.getValue()); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDate.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDate.java index 2c0533fa3..e7b65dad8 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDate.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDate.java @@ -12,7 +12,7 @@ import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; public class EnrichMissingPublicationDate extends UpdateMatcher { public EnrichMissingPublicationDate() { - super(false, + super(1, date -> Topic.ENRICH_MISSING_PUBLICATION_DATE, (p, date) -> p.setPublicationdate(date), s -> s); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingSubject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingSubject.java index 9ab9fce48..f762e3f52 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingSubject.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingSubject.java @@ -13,7 +13,7 @@ import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; public class EnrichMissingSubject extends UpdateMatcher { public EnrichMissingSubject() { - super(true, + super(20, s -> Topic.fromPath("ENRICH/MISSING/SUBJECT/" + s.getType()), (p, s) -> p.getSubjects().add(s), s -> subjectAsString(s)); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreOpenAccess.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreOpenAccess.java index e90a8f201..9ce362a97 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreOpenAccess.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreOpenAccess.java @@ -14,7 +14,7 @@ import eu.dnetlib.dhp.broker.oa.util.BrokerConstants; public class EnrichMoreOpenAccess extends UpdateMatcher { public EnrichMoreOpenAccess() { - super(true, + super(20, i -> Topic.ENRICH_MORE_OA_VERSION, (p, i) -> p.getInstances().add(i), OaBrokerInstance::getUrl); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMorePid.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMorePid.java index 43b4f0628..583960037 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMorePid.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMorePid.java @@ -13,7 +13,7 @@ import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; public class EnrichMorePid extends UpdateMatcher { public EnrichMorePid() { - super(true, + super(20, pid -> Topic.ENRICH_MORE_PID, (p, pid) -> p.getPids().add(pid), pid -> pidAsString(pid)); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreSubject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreSubject.java index 97b289b69..150029462 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreSubject.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreSubject.java @@ -13,7 +13,7 @@ import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; public class EnrichMoreSubject extends UpdateMatcher { public EnrichMoreSubject() { - super(true, + super(20, s -> Topic.fromPath("ENRICH/MORE/SUBJECT/" + s.getType()), (p, s) -> p.getSubjects().add(s), s -> subjectAsString(s)); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EventFinder.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EventFinder.java index 1a3f514e8..e142b5904 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EventFinder.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EventFinder.java @@ -7,7 +7,16 @@ import java.util.List; import eu.dnetlib.broker.objects.OaBrokerMainEntity; import eu.dnetlib.dhp.broker.model.EventFactory; import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; +import eu.dnetlib.dhp.broker.oa.matchers.relatedProjects.EnrichMissingProject; import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMissingAbstract; +import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMissingAuthorOrcid; +import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMissingOpenAccess; +import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMissingPid; +import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMissingPublicationDate; +import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMissingSubject; +import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMoreOpenAccess; +import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMorePid; +import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMoreSubject; import eu.dnetlib.dhp.broker.oa.util.aggregators.simple.ResultGroup; import eu.dnetlib.pace.config.DedupConfig; @@ -16,17 +25,17 @@ public class EventFinder { private static List> matchers = new ArrayList<>(); static { matchers.add(new EnrichMissingAbstract()); - // matchers.add(new EnrichMissingAuthorOrcid()); - // matchers.add(new EnrichMissingOpenAccess()); - // matchers.add(new EnrichMissingPid()); - // matchers.add(new EnrichMissingPublicationDate()); - // matchers.add(new EnrichMissingSubject()); - // matchers.add(new EnrichMoreOpenAccess()); - // matchers.add(new EnrichMorePid()); - // matchers.add(new EnrichMoreSubject()); + matchers.add(new EnrichMissingAuthorOrcid()); + matchers.add(new EnrichMissingOpenAccess()); + matchers.add(new EnrichMissingPid()); + matchers.add(new EnrichMissingPublicationDate()); + matchers.add(new EnrichMissingSubject()); + matchers.add(new EnrichMoreOpenAccess()); + matchers.add(new EnrichMorePid()); + matchers.add(new EnrichMoreSubject()); // // Advanced matchers - // matchers.add(new EnrichMissingProject()); + matchers.add(new EnrichMissingProject()); // matchers.add(new EnrichMoreProject()); // matchers.add(new EnrichMissingSoftware()); // matchers.add(new EnrichMoreSoftware());