From ba4b4c74d86850db063d0b02948220747f1e20d9 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 15 Apr 2021 16:48:26 +0200 Subject: [PATCH] do not make the identifier prefix depend on the Handle --- .../schema/oaf/utils/IdentifierFactory.java | 22 +++++------ .../oaf/utils/IdentifierFactoryTest.java | 3 ++ .../schema/oaf/utils/publication_doi4.json | 37 +++++++++++++++++++ 3 files changed, 51 insertions(+), 11 deletions(-) create mode 100644 dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi4.json diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactory.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactory.java index fe4642ee97..a0532da268 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactory.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactory.java @@ -57,7 +57,7 @@ public class IdentifierFactory implements Serializable { } public static List getPids(List pid, KeyValue collectedFrom) { - return pidFromInstance(pid, collectedFrom).distinct().collect(Collectors.toList()); + return pidFromInstance(pid, collectedFrom, true).distinct().collect(Collectors.toList()); } public static String createDOIBoostIdentifier(T entity) { @@ -104,7 +104,7 @@ public class IdentifierFactory implements Serializable { checkArgument(StringUtils.isNoneBlank(entity.getId()), "missing entity identifier"); - final Map> pids = extractPids(entity); + final Map> pids = extractPids(entity); return pids .values() @@ -125,7 +125,7 @@ public class IdentifierFactory implements Serializable { .orElseGet(entity::getId); } - private static Map> extractPids(T entity) { + private static Map> extractPids(T entity) { if (entity instanceof Result) { return Optional .ofNullable(((Result) entity).getInstance()) @@ -142,23 +142,23 @@ public class IdentifierFactory implements Serializable { Collectors .groupingBy( p -> p.getQualifier().getClassid(), - Collectors.mapping(p -> p, Collectors.toList()))); + Collectors.mapping(p -> p, Collectors.toCollection(HashSet::new)))); } } - private static Map> mapPids(List instance) { + private static Map> mapPids(List instance) { return instance .stream() - .map(i -> pidFromInstance(i.getPid(), i.getCollectedfrom())) + .map(i -> pidFromInstance(i.getPid(), i.getCollectedfrom(), false)) .flatMap(Function.identity()) .collect( Collectors .groupingBy( p -> p.getQualifier().getClassid(), - Collectors.mapping(p -> p, Collectors.toList()))); + Collectors.mapping(p -> p, Collectors.toCollection(HashSet::new)))); } - private static Stream pidFromInstance(List pid, KeyValue collectedFrom) { + private static Stream pidFromInstance(List pid, KeyValue collectedFrom, boolean mapHandles) { return Optional .ofNullable(pid) .map( @@ -167,16 +167,16 @@ public class IdentifierFactory implements Serializable { // filter away PIDs provided by a DS that is not considered an authority for the // given PID Type .filter(p -> { - return shouldFilterPid(collectedFrom, p); + return shouldFilterPid(collectedFrom, p, mapHandles); }) .map(CleaningFunctions::normalizePidValue) .filter(CleaningFunctions::pidFilter)) .orElse(Stream.empty()); } - private static boolean shouldFilterPid(KeyValue collectedFrom, StructuredProperty p) { + private static boolean shouldFilterPid(KeyValue collectedFrom, StructuredProperty p, boolean mapHandles) { final PidType pType = PidType.tryValueOf(p.getQualifier().getClassid()); - return pType.equals(PidType.handle) || Optional.ofNullable(collectedFrom).isPresent() && + return (mapHandles && pType.equals(PidType.handle)) || Optional.ofNullable(collectedFrom).isPresent() && Optional .ofNullable(PID_AUTHORITY.get(pType)) .map(authorities -> { diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactoryTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactoryTest.java index 31ef91a7a7..935b74b08f 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactoryTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactoryTest.java @@ -31,6 +31,9 @@ public class IdentifierFactoryTest { verifyIdentifier( "publication_doi3.json", "50|pmc_________::94e4cb08c93f8733b48e2445d04002ac", true); + verifyIdentifier( + "publication_doi4.json", "50|od______2852::38861c44e6052a8d49f59a4c39ba5e66", true); + verifyIdentifier( "publication_pmc1.json", "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", true); diff --git a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi4.json b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi4.json new file mode 100644 index 0000000000..ac99ca93a0 --- /dev/null +++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi4.json @@ -0,0 +1,37 @@ +{ + "id": "50|od______2852::38861c44e6052a8d49f59a4c39ba5e66", + "instance": [ + { + "collectedfrom": { + "key": "10|openaire____::1234", + "value": "Zenodo" + }, + "pid": [ + { + "qualifier": {"classid": "doi"}, + "value": "10.1016/j.cmet.2010.03.013" + }, + { + "qualifier": {"classid": "handle"}, + "value": "11012/83840" + } + ] + }, + { + "collectedfrom": { + "key": "10|opendoar____::2852", + "value": "Digital library of Brno University of Technology" + }, + "pid": [ + { + "qualifier": {"classid": "pmc"}, + "value": "21459329" + }, + { + "qualifier": {"classid": "handle"}, + "value": "11012/83840" + } + ] + } + ] +} \ No newline at end of file