dnet-hadoop/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAuthorOrcid.java

69 lines
1.9 KiB
Java
Raw Normal View History

2020-05-08 16:49:47 +02:00
package eu.dnetlib.dhp.broker.oa.matchers.simple;
2020-06-11 14:30:24 +02:00
import java.util.ArrayList;
import java.util.List;
2020-06-11 14:30:24 +02:00
import java.util.Set;
import java.util.stream.Collectors;
2020-05-13 12:00:27 +02:00
import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
2020-05-15 12:25:37 +02:00
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
2020-06-11 11:25:18 +02:00
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations;
2020-06-11 14:30:24 +02:00
import eu.dnetlib.dhp.schema.oaf.Author;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
2020-06-09 16:01:31 +02:00
import eu.dnetlib.pace.config.DedupConfig;
2020-06-11 14:30:24 +02:00
public class EnrichMissingAuthorOrcid extends UpdateMatcher<String> {
2020-05-13 12:00:27 +02:00
public EnrichMissingAuthorOrcid() {
super(true);
}
@Override
2020-06-11 14:30:24 +02:00
protected List<UpdateInfo<String>> findUpdates(final ResultWithRelations source,
2020-06-11 11:25:18 +02:00
final ResultWithRelations target,
2020-06-10 12:11:16 +02:00
final DedupConfig dedupConfig) {
2020-06-11 14:30:24 +02:00
final Set<String> existingOrcids = target
.getResult()
.getAuthor()
.stream()
.map(Author::getPid)
.flatMap(List::stream)
.filter(pid -> pid.getQualifier().getClassid().equalsIgnoreCase("orcid"))
.map(pid -> pid.getValue())
.collect(Collectors.toSet());
final List<UpdateInfo<String>> list = new ArrayList<>();
for (final Author author : source.getResult().getAuthor()) {
final String name = author.getFullname();
for (final StructuredProperty pid : author.getPid()) {
if (pid.getQualifier().getClassid().equalsIgnoreCase("orcid")
&& !existingOrcids.contains(pid.getValue())) {
list
.add(
generateUpdateInfo(name + " [ORCID: " + pid.getValue() + "]", source, target, dedupConfig));
;
}
}
}
return list;
}
2020-06-11 14:30:24 +02:00
public UpdateInfo<String> generateUpdateInfo(final String highlightValue,
2020-06-11 11:25:18 +02:00
final ResultWithRelations source,
final ResultWithRelations target,
2020-06-09 16:01:31 +02:00
final DedupConfig dedupConfig) {
2020-05-13 12:00:27 +02:00
return new UpdateInfo<>(
Topic.ENRICH_MISSING_AUTHOR_ORCID,
highlightValue, source, target,
2020-06-11 14:30:24 +02:00
(p, aut) -> p.getCreators().add(aut),
aut -> aut,
2020-06-09 16:01:31 +02:00
dedupConfig);
}
}