package eu.dnetlib.dhp.broker.oa.matchers; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.function.BiConsumer; import java.util.function.Function; import java.util.stream.Collectors; import org.apache.commons.codec.digest.DigestUtils; import org.apache.commons.lang3.StringUtils; import org.apache.spark.util.LongAccumulator; import eu.dnetlib.broker.objects.OaBrokerMainEntity; import eu.dnetlib.broker.objects.OaBrokerRelatedDatasource; import eu.dnetlib.dhp.broker.model.Topic; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; public abstract class UpdateMatcher { private final int maxNumber; private final Function topicFunction; private final BiConsumer compileHighlightFunction; private final Function highlightToStringFunction; public UpdateMatcher(final int maxNumber, final Function topicFunction, final BiConsumer compileHighlightFunction, final Function highlightToStringFunction) { this.maxNumber = maxNumber; this.topicFunction = topicFunction; this.compileHighlightFunction = compileHighlightFunction; this.highlightToStringFunction = highlightToStringFunction; } public Collection> searchUpdatesForRecord(final OaBrokerMainEntity target, final OaBrokerRelatedDatasource targetDs, final Collection others, final Map accumulators) { final Map> infoMap = new HashMap<>(); for (final OaBrokerMainEntity source : others) { if (source != target) { for (final T hl : findDifferences(source, target)) { final Topic topic = getTopicFunction().apply(hl); if (topic != null) { final UpdateInfo info = new UpdateInfo<>(topic, hl, source, target, targetDs, getCompileHighlightFunction(), getHighlightToStringFunction()); final String s = DigestUtils.md5Hex(info.getHighlightValueAsString()); if (!infoMap.containsKey(s) || infoMap.get(s).getTrust() < info.getTrust()) { infoMap.put(s, info); } } } } } final List> values = infoMap .values() .stream() .sorted((o1, o2) -> Float.compare(o2.getTrust(), o1.getTrust())) // DESCENDING .collect(Collectors.toList()); if (values.isEmpty()) { return new ArrayList<>(); } else if (values.size() > maxNumber) { incrementAccumulator(accumulators, maxNumber); return values.subList(0, maxNumber); } else { incrementAccumulator(accumulators, values.size()); return values; } } protected abstract List findDifferences(OaBrokerMainEntity source, OaBrokerMainEntity target); protected static boolean isMissing(final List list) { return list == null || list.isEmpty() || StringUtils.isBlank(list.get(0)); } protected boolean isMissing(final String s) { return StringUtils.isBlank(s); } public int getMaxNumber() { return maxNumber; } public Function getTopicFunction() { return topicFunction; } public BiConsumer getCompileHighlightFunction() { return compileHighlightFunction; } public Function getHighlightToStringFunction() { return highlightToStringFunction; } public String accumulatorName() { return "event_matcher_" + getClass().getSimpleName().toLowerCase(); } public void incrementAccumulator(final Map accumulators, final long n) { if (accumulators != null && accumulators.containsKey(accumulatorName())) { accumulators.get(accumulatorName()).add(n); } } }