dnet-hadoop/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EventFinder.java

96 lines
4.4 KiB
Java
Raw Normal View History

2020-06-11 11:25:18 +02:00
package eu.dnetlib.dhp.broker.oa.util;
import java.util.ArrayList;
import java.util.List;
2020-06-29 16:33:32 +02:00
import java.util.Map;
import org.apache.spark.util.LongAccumulator;
2020-06-11 11:25:18 +02:00
2020-06-22 08:51:31 +02:00
import eu.dnetlib.broker.objects.OaBrokerMainEntity;
2020-06-11 11:25:18 +02:00
import eu.dnetlib.dhp.broker.model.EventFactory;
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
2020-06-29 08:43:56 +02:00
import eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets.EnrichMissingDatasetIsReferencedBy;
import eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets.EnrichMissingDatasetIsRelatedTo;
import eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets.EnrichMissingDatasetIsSupplementedBy;
import eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets.EnrichMissingDatasetIsSupplementedTo;
import eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets.EnrichMissingDatasetReferences;
2020-06-26 11:20:45 +02:00
import eu.dnetlib.dhp.broker.oa.matchers.relatedProjects.EnrichMissingProject;
2020-06-29 08:43:56 +02:00
import eu.dnetlib.dhp.broker.oa.matchers.relatedProjects.EnrichMoreProject;
import eu.dnetlib.dhp.broker.oa.matchers.relatedPublications.EnrichMissingPublicationIsReferencedBy;
import eu.dnetlib.dhp.broker.oa.matchers.relatedPublications.EnrichMissingPublicationIsRelatedTo;
import eu.dnetlib.dhp.broker.oa.matchers.relatedPublications.EnrichMissingPublicationIsSupplementedBy;
import eu.dnetlib.dhp.broker.oa.matchers.relatedPublications.EnrichMissingPublicationIsSupplementedTo;
import eu.dnetlib.dhp.broker.oa.matchers.relatedPublications.EnrichMissingPublicationReferences;
import eu.dnetlib.dhp.broker.oa.matchers.relatedSoftware.EnrichMissingSoftware;
import eu.dnetlib.dhp.broker.oa.matchers.relatedSoftware.EnrichMoreSoftware;
2020-06-11 11:25:18 +02:00
import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMissingAbstract;
2020-06-26 11:20:45 +02:00
import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMissingAuthorOrcid;
import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMissingOpenAccess;
import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMissingPid;
import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMissingPublicationDate;
import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMissingSubject;
import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMoreOpenAccess;
import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMorePid;
import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMoreSubject;
2020-06-11 11:25:18 +02:00
import eu.dnetlib.dhp.broker.oa.util.aggregators.simple.ResultGroup;
import eu.dnetlib.pace.config.DedupConfig;
public class EventFinder {
2020-06-29 16:33:32 +02:00
private static final List<UpdateMatcher<?>> matchers = new ArrayList<>();
2020-06-11 11:25:18 +02:00
static {
matchers.add(new EnrichMissingAbstract());
2020-06-26 11:20:45 +02:00
matchers.add(new EnrichMissingAuthorOrcid());
matchers.add(new EnrichMissingOpenAccess());
matchers.add(new EnrichMissingPid());
matchers.add(new EnrichMissingPublicationDate());
matchers.add(new EnrichMissingSubject());
matchers.add(new EnrichMoreOpenAccess());
matchers.add(new EnrichMorePid());
matchers.add(new EnrichMoreSubject());
2020-06-11 11:25:18 +02:00
2020-06-29 16:33:32 +02:00
// Advanced matchers
2020-06-26 11:20:45 +02:00
matchers.add(new EnrichMissingProject());
2020-06-29 08:43:56 +02:00
matchers.add(new EnrichMoreProject());
matchers.add(new EnrichMissingSoftware());
matchers.add(new EnrichMoreSoftware());
matchers.add(new EnrichMissingPublicationIsRelatedTo());
matchers.add(new EnrichMissingPublicationIsReferencedBy());
matchers.add(new EnrichMissingPublicationReferences());
matchers.add(new EnrichMissingPublicationIsSupplementedTo());
matchers.add(new EnrichMissingPublicationIsSupplementedBy());
matchers.add(new EnrichMissingDatasetIsRelatedTo());
matchers.add(new EnrichMissingDatasetIsReferencedBy());
matchers.add(new EnrichMissingDatasetReferences());
matchers.add(new EnrichMissingDatasetIsSupplementedTo());
matchers.add(new EnrichMissingDatasetIsSupplementedBy());
matchers.add(new EnrichMissingAbstract());
2020-06-11 11:25:18 +02:00
}
2020-06-29 16:33:32 +02:00
public static EventGroup generateEvents(final ResultGroup results,
final DedupConfig dedupConfig,
final Map<String, LongAccumulator> accumulators) {
2020-06-11 11:25:18 +02:00
final List<UpdateInfo<?>> list = new ArrayList<>();
2020-06-22 08:51:31 +02:00
for (final OaBrokerMainEntity target : results.getData()) {
2020-06-11 11:25:18 +02:00
for (final UpdateMatcher<?> matcher : matchers) {
2020-06-29 16:33:32 +02:00
list.addAll(matcher.searchUpdatesForRecord(target, results.getData(), dedupConfig, accumulators));
2020-06-11 11:25:18 +02:00
}
}
return asEventGroup(list);
}
private static EventGroup asEventGroup(final List<UpdateInfo<?>> list) {
final EventGroup events = new EventGroup();
list.stream().map(EventFactory::newBrokerEvent).forEach(events::addElement);
return events;
}
2020-06-29 16:33:32 +02:00
public static List<UpdateMatcher<?>> getMatchers() {
return matchers;
}
2020-06-11 11:25:18 +02:00
}