dnet-hadoop/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMorePid.java

48 lines
1.5 KiB
Java
Raw Normal View History

2020-05-08 16:49:47 +02:00
package eu.dnetlib.dhp.broker.oa.matchers.simple;
import java.util.List;
2020-05-15 12:25:37 +02:00
import java.util.Set;
import java.util.stream.Collectors;
import eu.dnetlib.broker.objects.Pid;
2020-05-13 12:00:27 +02:00
import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
2020-05-15 12:25:37 +02:00
import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
import eu.dnetlib.dhp.schema.oaf.Result;
2020-06-09 16:01:31 +02:00
import eu.dnetlib.pace.config.DedupConfig;
public class EnrichMorePid extends UpdateMatcher<Result, Pid> {
2020-05-13 12:00:27 +02:00
public EnrichMorePid() {
super(true);
}
@Override
2020-06-09 16:01:31 +02:00
protected List<UpdateInfo<Pid>> findUpdates(final Result source, final Result target, final DedupConfig dedupConfig) {
2020-05-15 12:25:37 +02:00
final Set<String> existingPids = target
.getPid()
.stream()
.map(pid -> pid.getQualifier().getClassid() + "::" + pid.getValue())
.collect(Collectors.toSet());
return source
.getPid()
.stream()
.filter(pid -> !existingPids.contains(pid.getQualifier().getClassid() + "::" + pid.getValue()))
.map(ConversionUtils::oafPidToBrokerPid)
2020-06-09 16:01:31 +02:00
.map(i -> generateUpdateInfo(i, source, target, dedupConfig))
2020-05-15 12:25:37 +02:00
.collect(Collectors.toList());
}
2020-06-09 16:01:31 +02:00
public UpdateInfo<Pid> generateUpdateInfo(final Pid highlightValue, final Result source, final Result target, final DedupConfig dedupConfig) {
2020-05-13 12:00:27 +02:00
return new UpdateInfo<>(
Topic.ENRICH_MORE_PID,
highlightValue, source, target,
(p, pid) -> p.getPids().add(pid),
2020-06-09 16:01:31 +02:00
pid -> pid.getType() + "::" + pid.getValue(), dedupConfig);
}
}