From a41e0cb64899dc3673dd119e93a2550b4b8c70ec Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Thu, 11 Jun 2020 12:28:34 +0200 Subject: [PATCH 1/3] missing landingPage urls in instances --- .../eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java index e6a744fc0c..7ff483aff9 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java @@ -138,9 +138,16 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { for (final Object o : doc.selectNodes("//datacite:alternateIdentifier[@alternateIdentifierType='URL']")) { url.add(((Node) o).getText().trim()); } + for (final Object o : doc + .selectNodes("//datacite:alternateIdentifier[@alternateIdentifierType='landingPage']")) { + url.add(((Node) o).getText().trim()); + } for (final Object o : doc.selectNodes("//datacite:identifier[@identifierType='URL']")) { url.add(((Node) o).getText().trim()); } + for (final Object o : doc.selectNodes("//datacite:identifier[@identifierType='landingPage']")) { + url.add(((Node) o).getText().trim()); + } for (final Object o : doc.selectNodes("//datacite:alternateIdentifier[@alternateIdentifierType='DOI']")) { url.add(HTTP_DX_DOI_PREIFX + ((Node) o).getText().trim()); } @@ -379,11 +386,13 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { res .addAll( prepareListStructPropsWithValidQualifier( - doc, "//datacite:identifier[@identifierType != 'URL']", "@identifierType", DNET_PID_TYPES, info)); + doc, "//datacite:identifier[@identifierType != 'URL' and @identifierType != 'landingPage']", + "@identifierType", DNET_PID_TYPES, info)); res .addAll( prepareListStructPropsWithValidQualifier( - doc, "//datacite:alternateIdentifier[@alternateIdentifierType != 'URL']", + doc, + "//datacite:alternateIdentifier[@alternateIdentifierType != 'URL' and @alternateIdentifierType != 'landingPage']", "@alternateIdentifierType", DNET_PID_TYPES, info)); return res; } From 48959e9a1710f627a902dc986e07e180193edb9e Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Thu, 11 Jun 2020 14:24:02 +0200 Subject: [PATCH 2/3] orcid events --- .../simple/EnrichMissingAuthorOrcid.java | 49 ++++++++++++++----- 1 file changed, 38 insertions(+), 11 deletions(-) diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAuthorOrcid.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAuthorOrcid.java index 1226aaf45e..14021480d7 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAuthorOrcid.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAuthorOrcid.java @@ -1,41 +1,68 @@ package eu.dnetlib.dhp.broker.oa.matchers.simple; -import java.util.Arrays; +import java.util.ArrayList; import java.util.List; - -import org.apache.commons.lang3.tuple.Pair; +import java.util.Set; +import java.util.stream.Collectors; import eu.dnetlib.dhp.broker.model.Topic; import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations; +import eu.dnetlib.dhp.schema.oaf.Author; +import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import eu.dnetlib.pace.config.DedupConfig; -public class EnrichMissingAuthorOrcid extends UpdateMatcher> { +public class EnrichMissingAuthorOrcid extends UpdateMatcher { public EnrichMissingAuthorOrcid() { super(true); } @Override - protected List>> findUpdates(final ResultWithRelations source, + protected List> findUpdates(final ResultWithRelations source, final ResultWithRelations target, final DedupConfig dedupConfig) { - // TODO - // return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f)); - return Arrays.asList(); + + final Set existingOrcids = target + .getResult() + .getAuthor() + .stream() + .map(Author::getPid) + .flatMap(List::stream) + .filter(pid -> pid.getQualifier().getClassid().equalsIgnoreCase("orcid")) + .map(pid -> pid.getValue()) + .collect(Collectors.toSet()); + + final List> list = new ArrayList<>(); + + for (final Author author : source.getResult().getAuthor()) { + final String name = author.getFullname(); + + for (final StructuredProperty pid : author.getPid()) { + if (pid.getQualifier().getClassid().equalsIgnoreCase("orcid") + && !existingOrcids.contains(pid.getValue())) { + list + .add( + generateUpdateInfo(name + " [ORCID: " + pid.getValue() + "]", source, target, dedupConfig)); + ; + } + } + } + + return list; } - public UpdateInfo> generateUpdateInfo(final Pair highlightValue, + public UpdateInfo generateUpdateInfo(final String highlightValue, final ResultWithRelations source, final ResultWithRelations target, final DedupConfig dedupConfig) { return new UpdateInfo<>( Topic.ENRICH_MISSING_AUTHOR_ORCID, highlightValue, source, target, - (p, pair) -> p.getCreators().add(pair.getLeft() + " - ORCID: " + pair.getRight()), - pair -> pair.getLeft() + "::" + pair.getRight(), + (p, aut) -> p.getCreators().add(aut), + aut -> aut, dedupConfig); } } From c2e1b66e83b6bda2cdf863a1c10e6e02411baea1 Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Thu, 11 Jun 2020 14:28:03 +0200 Subject: [PATCH 3/3] Revert "orcid events" This reverts commit 48959e9a1710f627a902dc986e07e180193edb9e. --- .../simple/EnrichMissingAuthorOrcid.java | 49 +++++-------------- 1 file changed, 11 insertions(+), 38 deletions(-) diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAuthorOrcid.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAuthorOrcid.java index 14021480d7..1226aaf45e 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAuthorOrcid.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAuthorOrcid.java @@ -1,68 +1,41 @@ package eu.dnetlib.dhp.broker.oa.matchers.simple; -import java.util.ArrayList; +import java.util.Arrays; import java.util.List; -import java.util.Set; -import java.util.stream.Collectors; + +import org.apache.commons.lang3.tuple.Pair; import eu.dnetlib.dhp.broker.model.Topic; import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations; -import eu.dnetlib.dhp.schema.oaf.Author; -import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import eu.dnetlib.pace.config.DedupConfig; -public class EnrichMissingAuthorOrcid extends UpdateMatcher { +public class EnrichMissingAuthorOrcid extends UpdateMatcher> { public EnrichMissingAuthorOrcid() { super(true); } @Override - protected List> findUpdates(final ResultWithRelations source, + protected List>> findUpdates(final ResultWithRelations source, final ResultWithRelations target, final DedupConfig dedupConfig) { - - final Set existingOrcids = target - .getResult() - .getAuthor() - .stream() - .map(Author::getPid) - .flatMap(List::stream) - .filter(pid -> pid.getQualifier().getClassid().equalsIgnoreCase("orcid")) - .map(pid -> pid.getValue()) - .collect(Collectors.toSet()); - - final List> list = new ArrayList<>(); - - for (final Author author : source.getResult().getAuthor()) { - final String name = author.getFullname(); - - for (final StructuredProperty pid : author.getPid()) { - if (pid.getQualifier().getClassid().equalsIgnoreCase("orcid") - && !existingOrcids.contains(pid.getValue())) { - list - .add( - generateUpdateInfo(name + " [ORCID: " + pid.getValue() + "]", source, target, dedupConfig)); - ; - } - } - } - - return list; + // TODO + // return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f)); + return Arrays.asList(); } - public UpdateInfo generateUpdateInfo(final String highlightValue, + public UpdateInfo> generateUpdateInfo(final Pair highlightValue, final ResultWithRelations source, final ResultWithRelations target, final DedupConfig dedupConfig) { return new UpdateInfo<>( Topic.ENRICH_MISSING_AUTHOR_ORCID, highlightValue, source, target, - (p, aut) -> p.getCreators().add(aut), - aut -> aut, + (p, pair) -> p.getCreators().add(pair.getLeft() + " - ORCID: " + pair.getRight()), + pair -> pair.getLeft() + "::" + pair.getRight(), dedupConfig); } }