forked from antonis.lempesis/dnet-hadoop
limits
This commit is contained in:
parent
408165a756
commit
2393d9da2f
|
@ -1,13 +1,14 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.matchers;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.function.BiConsumer;
|
||||
import java.util.function.Function;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.codec.digest.DigestUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
@ -19,15 +20,15 @@ import eu.dnetlib.pace.config.DedupConfig;
|
|||
|
||||
public abstract class UpdateMatcher<T> {
|
||||
|
||||
private final boolean multipleUpdate;
|
||||
private final int maxNumber;
|
||||
private final Function<T, Topic> topicFunction;
|
||||
private final BiConsumer<OaBrokerMainEntity, T> compileHighlightFunction;
|
||||
private final Function<T, String> highlightToStringFunction;
|
||||
|
||||
public UpdateMatcher(final boolean multipleUpdate, final Function<T, Topic> topicFunction,
|
||||
public UpdateMatcher(final int maxNumber, final Function<T, Topic> topicFunction,
|
||||
final BiConsumer<OaBrokerMainEntity, T> compileHighlightFunction,
|
||||
final Function<T, String> highlightToStringFunction) {
|
||||
this.multipleUpdate = multipleUpdate;
|
||||
this.maxNumber = maxNumber;
|
||||
this.topicFunction = topicFunction;
|
||||
this.compileHighlightFunction = compileHighlightFunction;
|
||||
this.highlightToStringFunction = highlightToStringFunction;
|
||||
|
@ -57,17 +58,19 @@ public abstract class UpdateMatcher<T> {
|
|||
}
|
||||
}
|
||||
|
||||
final Collection<UpdateInfo<T>> values = infoMap.values();
|
||||
|
||||
if (values.isEmpty() || multipleUpdate) {
|
||||
return values;
|
||||
} else {
|
||||
final UpdateInfo<T> v = values
|
||||
final List<UpdateInfo<T>> values = infoMap
|
||||
.values()
|
||||
.stream()
|
||||
.sorted((o1, o2) -> Float.compare(o1.getTrust(), o2.getTrust()))
|
||||
.findFirst()
|
||||
.get();
|
||||
return Arrays.asList(v);
|
||||
.sorted((o1, o2) -> Float.compare(o2.getTrust(), o1.getTrust())) // DESCENDING
|
||||
.collect(Collectors.toList());
|
||||
|
||||
if (values.isEmpty()) {
|
||||
return new ArrayList<>();
|
||||
} else if (values.size() > maxNumber) {
|
||||
System.err.println("Too many events (" + values.size() + ") matched by " + getClass().getSimpleName());
|
||||
return values.subList(0, maxNumber);
|
||||
} else {
|
||||
return values;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -81,8 +84,8 @@ public abstract class UpdateMatcher<T> {
|
|||
return StringUtils.isBlank(field);
|
||||
}
|
||||
|
||||
public boolean isMultipleUpdate() {
|
||||
return multipleUpdate;
|
||||
public int getMaxNumber() {
|
||||
return maxNumber;
|
||||
}
|
||||
|
||||
public Function<T, Topic> getTopicFunction() {
|
||||
|
|
|
@ -13,7 +13,7 @@ import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
|
|||
public abstract class AbstractEnrichMissingDataset extends UpdateMatcher<OaBrokerRelatedDataset> {
|
||||
|
||||
public AbstractEnrichMissingDataset(final Topic topic) {
|
||||
super(true,
|
||||
super(10,
|
||||
rel -> topic,
|
||||
(p, rel) -> p.getDatasets().add(rel),
|
||||
rel -> rel.getOpenaireId());
|
||||
|
|
|
@ -12,7 +12,7 @@ import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
|
|||
public class EnrichMissingProject extends UpdateMatcher<OaBrokerProject> {
|
||||
|
||||
public EnrichMissingProject() {
|
||||
super(true,
|
||||
super(20,
|
||||
prj -> Topic.ENRICH_MISSING_PROJECT,
|
||||
(p, prj) -> p.getProjects().add(prj),
|
||||
prj -> prj.getFunder() + "::" + prj.getFundingProgram() + prj.getCode());
|
||||
|
|
|
@ -13,7 +13,7 @@ import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
|
|||
public class EnrichMoreProject extends UpdateMatcher<OaBrokerProject> {
|
||||
|
||||
public EnrichMoreProject() {
|
||||
super(true,
|
||||
super(20,
|
||||
prj -> Topic.ENRICH_MORE_PROJECT,
|
||||
(p, prj) -> p.getProjects().add(prj),
|
||||
prj -> projectAsString(prj));
|
||||
|
|
|
@ -13,7 +13,7 @@ import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
|
|||
public abstract class AbstractEnrichMissingPublication extends UpdateMatcher<OaBrokerRelatedPublication> {
|
||||
|
||||
public AbstractEnrichMissingPublication(final Topic topic) {
|
||||
super(true,
|
||||
super(10,
|
||||
rel -> topic,
|
||||
(p, rel) -> p.getPublications().add(rel),
|
||||
rel -> rel.getOpenaireId());
|
||||
|
|
|
@ -13,7 +13,7 @@ public class EnrichMissingSoftware
|
|||
extends UpdateMatcher<OaBrokerRelatedSoftware> {
|
||||
|
||||
public EnrichMissingSoftware() {
|
||||
super(true,
|
||||
super(10,
|
||||
s -> Topic.ENRICH_MISSING_SOFTWARE,
|
||||
(p, s) -> p.getSoftwares().add(s),
|
||||
s -> s.getOpenaireId());
|
||||
|
|
|
@ -13,7 +13,7 @@ import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
|
|||
public class EnrichMoreSoftware extends UpdateMatcher<OaBrokerRelatedSoftware> {
|
||||
|
||||
public EnrichMoreSoftware() {
|
||||
super(true,
|
||||
super(10,
|
||||
s -> Topic.ENRICH_MORE_SOFTWARE,
|
||||
(p, s) -> p.getSoftwares().add(s),
|
||||
s -> s.getOpenaireId());
|
||||
|
|
|
@ -12,7 +12,7 @@ import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
|
|||
public class EnrichMissingAbstract extends UpdateMatcher<String> {
|
||||
|
||||
public EnrichMissingAbstract() {
|
||||
super(false,
|
||||
super(1,
|
||||
s -> Topic.ENRICH_MISSING_ABSTRACT,
|
||||
(p, s) -> p.getAbstracts().add(s),
|
||||
s -> s);
|
||||
|
|
|
@ -15,7 +15,7 @@ import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
|
|||
public class EnrichMissingAuthorOrcid extends UpdateMatcher<OaBrokerAuthor> {
|
||||
|
||||
public EnrichMissingAuthorOrcid() {
|
||||
super(true,
|
||||
super(40,
|
||||
aut -> Topic.ENRICH_MISSING_AUTHOR_ORCID,
|
||||
(p, aut) -> p.getCreators().add(aut),
|
||||
aut -> aut.getOrcid());
|
||||
|
|
|
@ -14,7 +14,7 @@ import eu.dnetlib.dhp.broker.oa.util.BrokerConstants;
|
|||
public class EnrichMissingOpenAccess extends UpdateMatcher<OaBrokerInstance> {
|
||||
|
||||
public EnrichMissingOpenAccess() {
|
||||
super(true,
|
||||
super(20,
|
||||
i -> Topic.ENRICH_MISSING_OA_VERSION,
|
||||
(p, i) -> p.getInstances().add(i),
|
||||
OaBrokerInstance::getUrl);
|
||||
|
|
|
@ -13,7 +13,7 @@ import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
|
|||
public class EnrichMissingPid extends UpdateMatcher<OaBrokerTypedValue> {
|
||||
|
||||
public EnrichMissingPid() {
|
||||
super(true,
|
||||
super(10,
|
||||
pid -> Topic.ENRICH_MISSING_PID,
|
||||
(p, pid) -> p.getPids().add(pid),
|
||||
pid -> pid.getType() + "::" + pid.getValue());
|
||||
|
|
|
@ -12,7 +12,7 @@ import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
|
|||
public class EnrichMissingPublicationDate extends UpdateMatcher<String> {
|
||||
|
||||
public EnrichMissingPublicationDate() {
|
||||
super(false,
|
||||
super(1,
|
||||
date -> Topic.ENRICH_MISSING_PUBLICATION_DATE,
|
||||
(p, date) -> p.setPublicationdate(date),
|
||||
s -> s);
|
||||
|
|
|
@ -13,7 +13,7 @@ import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
|
|||
public class EnrichMissingSubject extends UpdateMatcher<OaBrokerTypedValue> {
|
||||
|
||||
public EnrichMissingSubject() {
|
||||
super(true,
|
||||
super(20,
|
||||
s -> Topic.fromPath("ENRICH/MISSING/SUBJECT/" + s.getType()),
|
||||
(p, s) -> p.getSubjects().add(s),
|
||||
s -> subjectAsString(s));
|
||||
|
|
|
@ -14,7 +14,7 @@ import eu.dnetlib.dhp.broker.oa.util.BrokerConstants;
|
|||
public class EnrichMoreOpenAccess extends UpdateMatcher<OaBrokerInstance> {
|
||||
|
||||
public EnrichMoreOpenAccess() {
|
||||
super(true,
|
||||
super(20,
|
||||
i -> Topic.ENRICH_MORE_OA_VERSION,
|
||||
(p, i) -> p.getInstances().add(i),
|
||||
OaBrokerInstance::getUrl);
|
||||
|
|
|
@ -13,7 +13,7 @@ import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
|
|||
public class EnrichMorePid extends UpdateMatcher<OaBrokerTypedValue> {
|
||||
|
||||
public EnrichMorePid() {
|
||||
super(true,
|
||||
super(20,
|
||||
pid -> Topic.ENRICH_MORE_PID,
|
||||
(p, pid) -> p.getPids().add(pid),
|
||||
pid -> pidAsString(pid));
|
||||
|
|
|
@ -13,7 +13,7 @@ import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
|
|||
public class EnrichMoreSubject extends UpdateMatcher<OaBrokerTypedValue> {
|
||||
|
||||
public EnrichMoreSubject() {
|
||||
super(true,
|
||||
super(20,
|
||||
s -> Topic.fromPath("ENRICH/MORE/SUBJECT/" + s.getType()),
|
||||
(p, s) -> p.getSubjects().add(s),
|
||||
s -> subjectAsString(s));
|
||||
|
|
|
@ -7,7 +7,16 @@ import java.util.List;
|
|||
import eu.dnetlib.broker.objects.OaBrokerMainEntity;
|
||||
import eu.dnetlib.dhp.broker.model.EventFactory;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.relatedProjects.EnrichMissingProject;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMissingAbstract;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMissingAuthorOrcid;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMissingOpenAccess;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMissingPid;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMissingPublicationDate;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMissingSubject;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMoreOpenAccess;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMorePid;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMoreSubject;
|
||||
import eu.dnetlib.dhp.broker.oa.util.aggregators.simple.ResultGroup;
|
||||
import eu.dnetlib.pace.config.DedupConfig;
|
||||
|
||||
|
@ -16,17 +25,17 @@ public class EventFinder {
|
|||
private static List<UpdateMatcher<?>> matchers = new ArrayList<>();
|
||||
static {
|
||||
matchers.add(new EnrichMissingAbstract());
|
||||
// matchers.add(new EnrichMissingAuthorOrcid());
|
||||
// matchers.add(new EnrichMissingOpenAccess());
|
||||
// matchers.add(new EnrichMissingPid());
|
||||
// matchers.add(new EnrichMissingPublicationDate());
|
||||
// matchers.add(new EnrichMissingSubject());
|
||||
// matchers.add(new EnrichMoreOpenAccess());
|
||||
// matchers.add(new EnrichMorePid());
|
||||
// matchers.add(new EnrichMoreSubject());
|
||||
matchers.add(new EnrichMissingAuthorOrcid());
|
||||
matchers.add(new EnrichMissingOpenAccess());
|
||||
matchers.add(new EnrichMissingPid());
|
||||
matchers.add(new EnrichMissingPublicationDate());
|
||||
matchers.add(new EnrichMissingSubject());
|
||||
matchers.add(new EnrichMoreOpenAccess());
|
||||
matchers.add(new EnrichMorePid());
|
||||
matchers.add(new EnrichMoreSubject());
|
||||
|
||||
// // Advanced matchers
|
||||
// matchers.add(new EnrichMissingProject());
|
||||
matchers.add(new EnrichMissingProject());
|
||||
// matchers.add(new EnrichMoreProject());
|
||||
// matchers.add(new EnrichMissingSoftware());
|
||||
// matchers.add(new EnrichMoreSoftware());
|
||||
|
|
Loading…
Reference in New Issue