dnet-hadoop/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EventFinder.java

128 lines
5.4 KiB
Java

package eu.dnetlib.dhp.broker.oa.util;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.spark.util.LongAccumulator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.broker.objects.OaBrokerMainEntity;
import eu.dnetlib.broker.objects.OaBrokerRelatedDatasource;
import eu.dnetlib.dhp.broker.model.EventFactory;
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
import eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets.EnrichMissingDatasetIsReferencedBy;
import eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets.EnrichMissingDatasetIsRelatedTo;
import eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets.EnrichMissingDatasetIsSupplementedBy;
import eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets.EnrichMissingDatasetIsSupplementedTo;
import eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets.EnrichMissingDatasetReferences;
import eu.dnetlib.dhp.broker.oa.matchers.relatedProjects.EnrichMissingProject;
import eu.dnetlib.dhp.broker.oa.matchers.relatedProjects.EnrichMoreProject;
import eu.dnetlib.dhp.broker.oa.matchers.relatedPublications.EnrichMissingPublicationIsReferencedBy;
import eu.dnetlib.dhp.broker.oa.matchers.relatedPublications.EnrichMissingPublicationIsRelatedTo;
import eu.dnetlib.dhp.broker.oa.matchers.relatedPublications.EnrichMissingPublicationIsSupplementedBy;
import eu.dnetlib.dhp.broker.oa.matchers.relatedPublications.EnrichMissingPublicationIsSupplementedTo;
import eu.dnetlib.dhp.broker.oa.matchers.relatedPublications.EnrichMissingPublicationReferences;
import eu.dnetlib.dhp.broker.oa.matchers.relatedSoftware.EnrichMissingSoftware;
import eu.dnetlib.dhp.broker.oa.matchers.relatedSoftware.EnrichMoreSoftware;
import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMissingAbstract;
import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMissingAuthorOrcid;
import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMissingOpenAccess;
import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMissingPid;
import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMissingPublicationDate;
import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMissingSubject;
import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMoreOpenAccess;
import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMorePid;
import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMoreSubject;
import eu.dnetlib.dhp.broker.oa.util.aggregators.simple.ResultGroup;
public class EventFinder {
private static final Logger log = LoggerFactory.getLogger(EventFinder.class);
private static final List<UpdateMatcher<?>> matchers = new ArrayList<>();
static {
matchers.add(new EnrichMissingAbstract());
matchers.add(new EnrichMissingAuthorOrcid());
matchers.add(new EnrichMissingOpenAccess());
matchers.add(new EnrichMissingPid());
matchers.add(new EnrichMissingPublicationDate());
matchers.add(new EnrichMissingSubject());
matchers.add(new EnrichMoreOpenAccess());
matchers.add(new EnrichMorePid());
matchers.add(new EnrichMoreSubject());
// Advanced matchers
matchers.add(new EnrichMissingProject());
matchers.add(new EnrichMoreProject());
matchers.add(new EnrichMissingSoftware());
matchers.add(new EnrichMoreSoftware());
matchers.add(new EnrichMissingPublicationIsRelatedTo());
matchers.add(new EnrichMissingPublicationIsReferencedBy());
matchers.add(new EnrichMissingPublicationReferences());
matchers.add(new EnrichMissingPublicationIsSupplementedTo());
matchers.add(new EnrichMissingPublicationIsSupplementedBy());
matchers.add(new EnrichMissingDatasetIsRelatedTo());
matchers.add(new EnrichMissingDatasetIsReferencedBy());
matchers.add(new EnrichMissingDatasetReferences());
matchers.add(new EnrichMissingDatasetIsSupplementedTo());
matchers.add(new EnrichMissingDatasetIsSupplementedBy());
}
public static EventGroup generateEvents(final ResultGroup results,
final Set<String> dsIdWhitelist,
final Set<String> dsIdBlacklist,
final Set<String> dsTypeWhitelist,
final Set<String> topicWhitelist,
final Map<String, LongAccumulator> accumulators) {
final List<UpdateInfo<?>> list = new ArrayList<>();
for (final OaBrokerMainEntity target : results.getData()) {
for (final OaBrokerRelatedDatasource targetDs : target.getDatasources()) {
if (verifyTarget(targetDs, dsIdWhitelist, dsIdBlacklist, dsTypeWhitelist)) {
for (final UpdateMatcher<?> matcher : matchers) {
for (final UpdateInfo<?> info : matcher
.searchUpdatesForRecord(target, targetDs, results.getData(), accumulators)) {
if (topicWhitelist == null || topicWhitelist.isEmpty()
|| topicWhitelist.contains(info.getTopic().getPath())) {
list.add(info);
}
}
}
}
}
}
return asEventGroup(list);
}
private static boolean verifyTarget(final OaBrokerRelatedDatasource target,
final Set<String> dsIdWhitelist,
final Set<String> dsIdBlacklist,
final Set<String> dsTypeWhitelist) {
if (dsIdWhitelist.contains(target.getOpenaireId())) {
return true;
} else if (dsIdBlacklist.contains(target.getOpenaireId())) {
return false;
} else {
return dsTypeWhitelist.contains(target.getType());
}
}
private static EventGroup asEventGroup(final List<UpdateInfo<?>> list) {
final EventGroup events = new EventGroup();
list.stream().map(EventFactory::newBrokerEvent).forEach(events::addElement);
return events;
}
public static List<UpdateMatcher<?>> getMatchers() {
return matchers;
}
}