From c6b5bb3f173343f3348500ab6db1f0766cb2daec Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Thu, 11 Jun 2020 14:30:24 +0200 Subject: [PATCH] orcid events --- .../simple/EnrichMissingAuthorOrcid.java | 49 ++++++++++++++----- 1 file changed, 38 insertions(+), 11 deletions(-) diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAuthorOrcid.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAuthorOrcid.java index 1226aaf45e..14021480d7 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAuthorOrcid.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAuthorOrcid.java @@ -1,41 +1,68 @@ package eu.dnetlib.dhp.broker.oa.matchers.simple; -import java.util.Arrays; +import java.util.ArrayList; import java.util.List; - -import org.apache.commons.lang3.tuple.Pair; +import java.util.Set; +import java.util.stream.Collectors; import eu.dnetlib.dhp.broker.model.Topic; import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations; +import eu.dnetlib.dhp.schema.oaf.Author; +import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import eu.dnetlib.pace.config.DedupConfig; -public class EnrichMissingAuthorOrcid extends UpdateMatcher> { +public class EnrichMissingAuthorOrcid extends UpdateMatcher { public EnrichMissingAuthorOrcid() { super(true); } @Override - protected List>> findUpdates(final ResultWithRelations source, + protected List> findUpdates(final ResultWithRelations source, final ResultWithRelations target, final DedupConfig dedupConfig) { - // TODO - // return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f)); - return Arrays.asList(); + + final Set existingOrcids = target + .getResult() + .getAuthor() + .stream() + .map(Author::getPid) + .flatMap(List::stream) + .filter(pid -> pid.getQualifier().getClassid().equalsIgnoreCase("orcid")) + .map(pid -> pid.getValue()) + .collect(Collectors.toSet()); + + final List> list = new ArrayList<>(); + + for (final Author author : source.getResult().getAuthor()) { + final String name = author.getFullname(); + + for (final StructuredProperty pid : author.getPid()) { + if (pid.getQualifier().getClassid().equalsIgnoreCase("orcid") + && !existingOrcids.contains(pid.getValue())) { + list + .add( + generateUpdateInfo(name + " [ORCID: " + pid.getValue() + "]", source, target, dedupConfig)); + ; + } + } + } + + return list; } - public UpdateInfo> generateUpdateInfo(final Pair highlightValue, + public UpdateInfo generateUpdateInfo(final String highlightValue, final ResultWithRelations source, final ResultWithRelations target, final DedupConfig dedupConfig) { return new UpdateInfo<>( Topic.ENRICH_MISSING_AUTHOR_ORCID, highlightValue, source, target, - (p, pair) -> p.getCreators().add(pair.getLeft() + " - ORCID: " + pair.getRight()), - pair -> pair.getLeft() + "::" + pair.getRight(), + (p, aut) -> p.getCreators().add(aut), + aut -> aut, dedupConfig); } }