dnet-hadoop/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingSubject.java

60 lines
1.8 KiB
Java
Raw Normal View History

2020-05-15 12:25:37 +02:00
package eu.dnetlib.dhp.broker.oa.matchers.simple;
2020-05-15 12:25:37 +02:00
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.commons.lang3.tuple.Pair;
import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
2020-05-15 12:25:37 +02:00
import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
2020-06-11 11:25:18 +02:00
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations;
2020-05-15 12:25:37 +02:00
import eu.dnetlib.dhp.schema.oaf.Qualifier;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
2020-06-09 16:01:31 +02:00
import eu.dnetlib.pace.config.DedupConfig;
2020-05-15 12:25:37 +02:00
2020-06-11 11:25:18 +02:00
public class EnrichMissingSubject extends UpdateMatcher<Pair<String, String>> {
2020-05-15 12:25:37 +02:00
public EnrichMissingSubject() {
super(true);
}
@Override
2020-06-11 11:25:18 +02:00
protected List<UpdateInfo<Pair<String, String>>> findUpdates(final ResultWithRelations source,
final ResultWithRelations target,
2020-06-10 12:11:16 +02:00
final DedupConfig dedupConfig) {
2020-05-15 12:25:37 +02:00
final Set<String> existingTypes = target
2020-06-11 11:25:18 +02:00
.getResult()
2020-05-15 12:25:37 +02:00
.getSubject()
.stream()
.map(StructuredProperty::getQualifier)
.map(Qualifier::getClassid)
.collect(Collectors.toSet());
return source
2020-06-11 11:25:18 +02:00
.getResult()
2020-05-15 12:25:37 +02:00
.getPid()
.stream()
.filter(pid -> !existingTypes.contains(pid.getQualifier().getClassid()))
.map(ConversionUtils::oafSubjectToPair)
2020-06-09 16:01:31 +02:00
.map(i -> generateUpdateInfo(i, source, target, dedupConfig))
2020-05-15 12:25:37 +02:00
.collect(Collectors.toList());
}
public UpdateInfo<Pair<String, String>> generateUpdateInfo(final Pair<String, String> highlightValue,
2020-06-11 11:25:18 +02:00
final ResultWithRelations source,
final ResultWithRelations target,
2020-06-09 16:01:31 +02:00
final DedupConfig dedupConfig) {
2020-05-15 12:25:37 +02:00
return new UpdateInfo<>(
Topic.fromPath("ENRICH/MISSING/SUBJECT/" + highlightValue.getLeft()),
highlightValue, source, target,
(p, pair) -> p.getSubjects().add(pair.getRight()),
2020-06-09 16:01:31 +02:00
pair -> pair.getLeft() + "::" + pair.getRight(), dedupConfig);
2020-05-15 12:25:37 +02:00
}
}