dnet-hadoop/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreSubject.java

49 lines
1.3 KiB
Java
Raw Normal View History

2020-05-08 16:49:47 +02:00
package eu.dnetlib.dhp.broker.oa.matchers.simple;
2020-07-01 08:42:39 +02:00
import java.util.ArrayList;
import java.util.List;
2020-05-15 12:25:37 +02:00
import java.util.Set;
import java.util.stream.Collectors;
2020-06-22 08:51:31 +02:00
import eu.dnetlib.broker.objects.OaBrokerMainEntity;
import eu.dnetlib.broker.objects.OaBrokerTypedValue;
2020-05-13 12:00:27 +02:00
import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
2020-07-01 08:42:39 +02:00
import eu.dnetlib.dhp.broker.oa.util.BrokerConstants;
2020-06-22 08:51:31 +02:00
public class EnrichMoreSubject extends UpdateMatcher<OaBrokerTypedValue> {
2020-05-13 12:00:27 +02:00
public EnrichMoreSubject() {
2020-06-26 11:20:45 +02:00
super(20,
2020-06-16 12:34:13 +02:00
s -> Topic.fromPath("ENRICH/MORE/SUBJECT/" + s.getType()),
(p, s) -> p.getSubjects().add(s),
s -> subjectAsString(s));
2020-05-13 12:00:27 +02:00
}
2020-05-13 12:00:27 +02:00
@Override
2020-06-22 08:51:31 +02:00
protected List<OaBrokerTypedValue> findDifferences(final OaBrokerMainEntity source,
final OaBrokerMainEntity target) {
2020-06-25 13:01:09 +02:00
2020-07-01 08:42:39 +02:00
if (target.getSubjects().size() >= BrokerConstants.MAX_LIST_SIZE) {
return new ArrayList<>();
}
2020-05-15 12:25:37 +02:00
final Set<String> existingSubjects = target
2020-06-16 12:34:13 +02:00
.getSubjects()
2020-05-15 12:25:37 +02:00
.stream()
2020-06-16 12:34:13 +02:00
.map(pid -> subjectAsString(pid))
2020-05-15 12:25:37 +02:00
.collect(Collectors.toSet());
return source
2020-06-16 12:34:13 +02:00
.getPids()
2020-05-15 12:25:37 +02:00
.stream()
2020-06-16 12:34:13 +02:00
.filter(s -> !existingSubjects.contains(subjectAsString(s)))
2020-05-15 12:25:37 +02:00
.collect(Collectors.toList());
}
2020-06-22 08:51:31 +02:00
private static String subjectAsString(final OaBrokerTypedValue s) {
2020-06-16 12:34:13 +02:00
return s.getType() + "::" + s.getValue();
}
}