2020-05-13 12:00:27 +02:00
|
|
|
|
2020-05-15 12:25:37 +02:00
|
|
|
package eu.dnetlib.dhp.broker.oa.matchers;
|
2020-05-13 12:00:27 +02:00
|
|
|
|
|
|
|
import java.util.Arrays;
|
|
|
|
import java.util.Collection;
|
|
|
|
import java.util.HashMap;
|
|
|
|
import java.util.List;
|
|
|
|
import java.util.Map;
|
2020-06-12 09:47:55 +02:00
|
|
|
import java.util.function.BiConsumer;
|
|
|
|
import java.util.function.Function;
|
2020-05-13 12:00:27 +02:00
|
|
|
|
|
|
|
import org.apache.commons.codec.digest.DigestUtils;
|
|
|
|
import org.apache.commons.lang3.StringUtils;
|
|
|
|
|
2020-06-22 08:51:31 +02:00
|
|
|
import eu.dnetlib.broker.objects.OaBrokerMainEntity;
|
2020-06-12 09:47:55 +02:00
|
|
|
import eu.dnetlib.dhp.broker.model.Topic;
|
2020-05-15 12:25:37 +02:00
|
|
|
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
|
2020-06-09 16:01:31 +02:00
|
|
|
import eu.dnetlib.pace.config.DedupConfig;
|
2020-05-13 12:00:27 +02:00
|
|
|
|
2020-06-11 11:25:18 +02:00
|
|
|
public abstract class UpdateMatcher<T> {
|
2020-05-13 12:00:27 +02:00
|
|
|
|
|
|
|
private final boolean multipleUpdate;
|
2020-06-12 09:47:55 +02:00
|
|
|
private final Function<T, Topic> topicFunction;
|
2020-06-22 08:51:31 +02:00
|
|
|
private final BiConsumer<OaBrokerMainEntity, T> compileHighlightFunction;
|
2020-06-12 09:47:55 +02:00
|
|
|
private final Function<T, String> highlightToStringFunction;
|
2020-05-13 12:00:27 +02:00
|
|
|
|
2020-06-12 09:47:55 +02:00
|
|
|
public UpdateMatcher(final boolean multipleUpdate, final Function<T, Topic> topicFunction,
|
2020-06-22 08:51:31 +02:00
|
|
|
final BiConsumer<OaBrokerMainEntity, T> compileHighlightFunction,
|
2020-06-12 09:47:55 +02:00
|
|
|
final Function<T, String> highlightToStringFunction) {
|
2020-05-13 12:00:27 +02:00
|
|
|
this.multipleUpdate = multipleUpdate;
|
2020-06-12 09:47:55 +02:00
|
|
|
this.topicFunction = topicFunction;
|
|
|
|
this.compileHighlightFunction = compileHighlightFunction;
|
|
|
|
this.highlightToStringFunction = highlightToStringFunction;
|
2020-05-13 12:00:27 +02:00
|
|
|
}
|
|
|
|
|
2020-06-22 08:51:31 +02:00
|
|
|
public Collection<UpdateInfo<T>> searchUpdatesForRecord(final OaBrokerMainEntity res,
|
|
|
|
final Collection<OaBrokerMainEntity> others,
|
2020-06-10 12:11:16 +02:00
|
|
|
final DedupConfig dedupConfig) {
|
2020-05-13 12:00:27 +02:00
|
|
|
|
|
|
|
final Map<String, UpdateInfo<T>> infoMap = new HashMap<>();
|
|
|
|
|
2020-06-22 08:51:31 +02:00
|
|
|
for (final OaBrokerMainEntity source : others) {
|
2020-05-13 12:00:27 +02:00
|
|
|
if (source != res) {
|
2020-06-12 09:47:55 +02:00
|
|
|
for (final T hl : findDifferences(source, res)) {
|
|
|
|
final Topic topic = getTopicFunction().apply(hl);
|
2020-06-25 13:01:09 +02:00
|
|
|
if (topic != null) {
|
|
|
|
final UpdateInfo<T> info = new UpdateInfo<>(topic, hl, source, res,
|
|
|
|
getCompileHighlightFunction(),
|
|
|
|
getHighlightToStringFunction(), dedupConfig);
|
|
|
|
|
|
|
|
final String s = DigestUtils.md5Hex(info.getHighlightValueAsString());
|
|
|
|
if (!infoMap.containsKey(s) || infoMap.get(s).getTrust() < info.getTrust()) {
|
|
|
|
infoMap.put(s, info);
|
|
|
|
}
|
2020-05-13 12:00:27 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
final Collection<UpdateInfo<T>> values = infoMap.values();
|
|
|
|
|
|
|
|
if (values.isEmpty() || multipleUpdate) {
|
|
|
|
return values;
|
|
|
|
} else {
|
|
|
|
final UpdateInfo<T> v = values
|
|
|
|
.stream()
|
|
|
|
.sorted((o1, o2) -> Float.compare(o1.getTrust(), o2.getTrust()))
|
|
|
|
.findFirst()
|
|
|
|
.get();
|
|
|
|
return Arrays.asList(v);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-06-22 08:51:31 +02:00
|
|
|
protected abstract List<T> findDifferences(OaBrokerMainEntity source, OaBrokerMainEntity target);
|
2020-05-13 12:00:27 +02:00
|
|
|
|
2020-06-16 12:34:13 +02:00
|
|
|
protected static boolean isMissing(final List<String> list) {
|
|
|
|
return list == null || list.isEmpty() || StringUtils.isBlank(list.get(0));
|
2020-05-13 12:00:27 +02:00
|
|
|
}
|
|
|
|
|
2020-06-16 12:34:13 +02:00
|
|
|
protected boolean isMissing(final String field) {
|
|
|
|
return StringUtils.isBlank(field);
|
2020-05-19 16:17:35 +02:00
|
|
|
}
|
|
|
|
|
2020-06-12 09:47:55 +02:00
|
|
|
public boolean isMultipleUpdate() {
|
|
|
|
return multipleUpdate;
|
|
|
|
}
|
|
|
|
|
|
|
|
public Function<T, Topic> getTopicFunction() {
|
|
|
|
return topicFunction;
|
|
|
|
}
|
|
|
|
|
2020-06-22 08:51:31 +02:00
|
|
|
public BiConsumer<OaBrokerMainEntity, T> getCompileHighlightFunction() {
|
2020-06-12 09:47:55 +02:00
|
|
|
return compileHighlightFunction;
|
|
|
|
}
|
|
|
|
|
|
|
|
public Function<T, String> getHighlightToStringFunction() {
|
|
|
|
return highlightToStringFunction;
|
|
|
|
}
|
|
|
|
|
2020-05-13 12:00:27 +02:00
|
|
|
}
|