2020-05-08 16:49:47 +02:00
|
|
|
|
2020-05-07 12:31:26 +02:00
|
|
|
package eu.dnetlib.dhp.broker.oa.util;
|
|
|
|
|
2020-06-05 11:43:00 +02:00
|
|
|
import java.util.List;
|
2020-05-13 12:00:27 +02:00
|
|
|
import java.util.function.BiConsumer;
|
|
|
|
import java.util.function.Function;
|
|
|
|
|
2020-06-09 16:01:31 +02:00
|
|
|
import org.slf4j.Logger;
|
|
|
|
import org.slf4j.LoggerFactory;
|
|
|
|
|
|
|
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
|
|
|
|
2020-05-07 12:31:26 +02:00
|
|
|
import eu.dnetlib.broker.objects.OpenAireEventPayload;
|
2020-06-05 11:43:00 +02:00
|
|
|
import eu.dnetlib.broker.objects.Provenance;
|
2020-05-13 12:00:27 +02:00
|
|
|
import eu.dnetlib.broker.objects.Publication;
|
|
|
|
import eu.dnetlib.dhp.broker.model.Topic;
|
2020-06-05 11:43:00 +02:00
|
|
|
import eu.dnetlib.dhp.schema.oaf.Instance;
|
|
|
|
import eu.dnetlib.dhp.schema.oaf.KeyValue;
|
2020-05-13 12:00:27 +02:00
|
|
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
2020-06-09 16:01:31 +02:00
|
|
|
import eu.dnetlib.pace.config.DedupConfig;
|
|
|
|
import eu.dnetlib.pace.model.MapDocument;
|
|
|
|
import eu.dnetlib.pace.tree.support.TreeProcessor;
|
|
|
|
import eu.dnetlib.pace.util.MapDocumentUtil;
|
2020-05-07 12:31:26 +02:00
|
|
|
|
2020-05-13 12:00:27 +02:00
|
|
|
public final class UpdateInfo<T> {
|
2020-05-07 12:31:26 +02:00
|
|
|
|
2020-05-13 12:00:27 +02:00
|
|
|
private final Topic topic;
|
2020-05-07 12:31:26 +02:00
|
|
|
|
|
|
|
private final T highlightValue;
|
|
|
|
|
2020-05-13 12:00:27 +02:00
|
|
|
private final Result source;
|
|
|
|
|
|
|
|
private final Result target;
|
|
|
|
|
|
|
|
private final BiConsumer<Publication, T> compileHighlight;
|
|
|
|
|
|
|
|
private final Function<T, String> highlightToString;
|
|
|
|
|
2020-05-07 12:31:26 +02:00
|
|
|
private final float trust;
|
|
|
|
|
2020-06-09 16:01:31 +02:00
|
|
|
private static final Logger log = LoggerFactory.getLogger(UpdateInfo.class);
|
|
|
|
|
2020-05-15 12:25:37 +02:00
|
|
|
public UpdateInfo(final Topic topic, final T highlightValue, final Result source, final Result target,
|
2020-05-13 12:00:27 +02:00
|
|
|
final BiConsumer<Publication, T> compileHighlight,
|
2020-06-09 16:01:31 +02:00
|
|
|
final Function<T, String> highlightToString,
|
|
|
|
final DedupConfig dedupConfig) {
|
2020-05-07 12:31:26 +02:00
|
|
|
this.topic = topic;
|
|
|
|
this.highlightValue = highlightValue;
|
2020-05-13 12:00:27 +02:00
|
|
|
this.source = source;
|
|
|
|
this.target = target;
|
|
|
|
this.compileHighlight = compileHighlight;
|
|
|
|
this.highlightToString = highlightToString;
|
2020-06-09 16:01:31 +02:00
|
|
|
this.trust = calculateTrust(dedupConfig, source, target);
|
2020-05-07 12:31:26 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
public T getHighlightValue() {
|
|
|
|
return highlightValue;
|
|
|
|
}
|
|
|
|
|
2020-05-13 12:00:27 +02:00
|
|
|
public Result getSource() {
|
|
|
|
return source;
|
2020-05-07 12:31:26 +02:00
|
|
|
}
|
|
|
|
|
2020-05-13 12:00:27 +02:00
|
|
|
public Result getTarget() {
|
|
|
|
return target;
|
|
|
|
}
|
|
|
|
|
2020-06-09 16:01:31 +02:00
|
|
|
private float calculateTrust(final DedupConfig dedupConfig, final Result r1, final Result r2) {
|
|
|
|
try {
|
|
|
|
final ObjectMapper objectMapper = new ObjectMapper();
|
|
|
|
final MapDocument doc1 = MapDocumentUtil.asMapDocumentWithJPath(dedupConfig, objectMapper.writeValueAsString(r1));
|
|
|
|
final MapDocument doc2 = MapDocumentUtil.asMapDocumentWithJPath(dedupConfig, objectMapper.writeValueAsString(r2));
|
|
|
|
|
|
|
|
final double score = new TreeProcessor(dedupConfig).computeScore(doc1, doc2);
|
|
|
|
final double threshold = dedupConfig.getWf().getThreshold();
|
|
|
|
|
|
|
|
return TrustUtils.rescale(score, threshold);
|
|
|
|
} catch (final Exception e) {
|
|
|
|
log.error("Error computing score between results", e);
|
|
|
|
return BrokerConstants.MIN_TRUST;
|
|
|
|
}
|
2020-05-13 12:00:27 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
protected Topic getTopic() {
|
2020-05-07 12:31:26 +02:00
|
|
|
return topic;
|
|
|
|
}
|
|
|
|
|
2020-05-13 12:00:27 +02:00
|
|
|
public String getTopicPath() {
|
|
|
|
return topic.getPath();
|
|
|
|
}
|
2020-05-07 12:31:26 +02:00
|
|
|
|
2020-05-13 12:00:27 +02:00
|
|
|
public float getTrust() {
|
|
|
|
return trust;
|
|
|
|
}
|
|
|
|
|
|
|
|
public String getHighlightValueAsString() {
|
|
|
|
return highlightToString.apply(getHighlightValue());
|
|
|
|
}
|
2020-05-07 12:31:26 +02:00
|
|
|
|
2020-06-05 11:43:00 +02:00
|
|
|
public OpenAireEventPayload asBrokerPayload() {
|
|
|
|
|
|
|
|
final Publication p = ConversionUtils.oafResultToBrokerPublication(getSource());
|
|
|
|
compileHighlight.accept(p, getHighlightValue());
|
|
|
|
|
|
|
|
final Publication hl = new Publication();
|
|
|
|
compileHighlight.accept(hl, getHighlightValue());
|
|
|
|
|
|
|
|
final String provId = getSource().getOriginalId().stream().findFirst().orElse(null);
|
2020-06-08 08:32:22 +02:00
|
|
|
final String provRepo = getSource()
|
|
|
|
.getCollectedfrom()
|
|
|
|
.stream()
|
|
|
|
.map(KeyValue::getValue)
|
|
|
|
.findFirst()
|
|
|
|
.orElse(null);
|
|
|
|
final String provUrl = getSource()
|
|
|
|
.getInstance()
|
|
|
|
.stream()
|
|
|
|
.map(Instance::getUrl)
|
|
|
|
.flatMap(List::stream)
|
|
|
|
.findFirst()
|
2020-06-09 16:01:31 +02:00
|
|
|
.orElse(null);;
|
2020-06-05 11:43:00 +02:00
|
|
|
|
|
|
|
final Provenance provenance = new Provenance().setId(provId).setRepositoryName(provRepo).setUrl(provUrl);
|
|
|
|
|
|
|
|
return new OpenAireEventPayload()
|
|
|
|
.setPublication(p)
|
|
|
|
.setHighlight(hl)
|
|
|
|
.setTrust(trust)
|
|
|
|
.setProvenance(provenance);
|
|
|
|
}
|
|
|
|
|
2020-05-07 12:31:26 +02:00
|
|
|
}
|