diff --git a/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java b/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java index ebab4c7..44b3755 100644 --- a/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java +++ b/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java @@ -16,6 +16,9 @@ public class ModelConstants { public static final String CROSSREF_ID = "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2"; public static final String DATACITE_ID = "10|openaire____::9e3be59865b2c1c335d32dae2fe7b254"; + public static final String ZENODO_OD_ID = "10|opendoar____::358aee4cc897452c00244351e4d91f69"; + public static final String ZENODO_R3_ID = "10|re3data_____::7b0ad08687b2c960d5aeef06f811d5e6"; + public static final String EUROPE_PUBMED_CENTRAL_ID = "10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c"; public static final String PUBMED_CENTRAL_ID = "10|opendoar____::eda80a3d5b344bc40f3bc04f65b7a357"; public static final String ARXIV_ID = "10|opendoar____::6f4922f45568161a8cdf4ad2299f6d23"; diff --git a/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactory.java b/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactory.java index d0baec5..2b7a5f3 100644 --- a/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactory.java +++ b/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactory.java @@ -40,6 +40,8 @@ public class IdentifierFactory implements Serializable { PID_AUTHORITY.put(PidType.doi, HashBiMap.create()); PID_AUTHORITY.get(PidType.doi).put(CROSSREF_ID, "Crossref"); PID_AUTHORITY.get(PidType.doi).put(DATACITE_ID, "Datacite"); + PID_AUTHORITY.get(PidType.doi).put(ZENODO_OD_ID, "ZENODO"); + PID_AUTHORITY.get(PidType.doi).put(ZENODO_R3_ID, "Zenodo"); PID_AUTHORITY.put(PidType.pmc, HashBiMap.create()); PID_AUTHORITY.get(PidType.pmc).put(EUROPE_PUBMED_CENTRAL_ID, "Europe PubMed Central"); @@ -53,6 +55,18 @@ public class IdentifierFactory implements Serializable { PID_AUTHORITY.get(PidType.arXiv).put(ARXIV_ID, "arXiv.org e-Print Archive"); } + /** + * Declares the associations PID_TYPE -> [DATASOURCE ID, PID SUBSTRING] considered as delegated authority for that + * PID_TYPE. Example, Zenodo is delegated to forge DOIs that contain the 'zenodo' word. + */ + public static final Map> DELEGATED_PID_AUTHORITY = Maps.newHashMap(); + + static { + DELEGATED_PID_AUTHORITY.put(PidType.doi, new HashMap<>()); + DELEGATED_PID_AUTHORITY.get(PidType.doi).put(ZENODO_OD_ID, "zenodo"); + DELEGATED_PID_AUTHORITY.get(PidType.doi).put(ZENODO_R3_ID, "zenodo"); + } + public static List getPids(List pid, KeyValue collectedFrom) { return pidFromInstance(pid, collectedFrom, true).distinct().collect(Collectors.toList()); } @@ -164,24 +178,38 @@ public class IdentifierFactory implements Serializable { .stream() // filter away PIDs provided by a DS that is not considered an authority for the // given PID Type - .filter(p -> shouldFilterPid(collectedFrom, p, mapHandles)) + .filter(p -> shouldFilterPidByAuthority(collectedFrom, p, mapHandles)) .map(CleaningFunctions::normalizePidValue) + .filter(p -> isNotFromDelegatedAuthority(collectedFrom, p)) .filter(CleaningFunctions::pidFilter)) .orElse(Stream.empty()); } - private static boolean shouldFilterPid(KeyValue collectedFrom, StructuredProperty p, boolean mapHandles) { + private static boolean shouldFilterPidByAuthority(KeyValue collectedFrom, StructuredProperty p, boolean mapHandles) { final PidType pType = PidType.tryValueOf(p.getQualifier().getClassid()); return (mapHandles && pType.equals(PidType.handle)) || Optional.ofNullable(collectedFrom).isPresent() && Optional .ofNullable(PID_AUTHORITY.get(pType)) .map(authorities -> { return authorities.containsKey(collectedFrom.getKey()) - || authorities.containsValue(collectedFrom.getValue()); + || authorities.containsValue(collectedFrom.getValue()); }) .orElse(false); } + private static boolean isNotFromDelegatedAuthority(KeyValue collectedFrom, StructuredProperty p) { + final PidType pType = PidType.tryValueOf(p.getQualifier().getClassid()); + + final Map da = DELEGATED_PID_AUTHORITY.get(pType); + if (Objects.isNull(da)) { + return true; + } + if (!da.containsKey(collectedFrom.getKey())) { + return true; + } + return StringUtils.contains(p.getValue(), da.get(collectedFrom.getKey())); + } + /** * @see {@link IdentifierFactory#createIdentifier(OafEntity, boolean)} */ diff --git a/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactoryTest.java b/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactoryTest.java index 9ef50dd..aec0878 100644 --- a/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactoryTest.java +++ b/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactoryTest.java @@ -34,6 +34,9 @@ public class IdentifierFactoryTest { verifyIdentifier( "publication_doi4.json", "50|od______2852::38861c44e6052a8d49f59a4c39ba5e66", true); + verifyIdentifier( + "publication_doi5.json", "50|doi_________::3bef95c0ca26dd55451fc8839ea69d27", true); + verifyIdentifier( "publication_pmc1.json", "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", true); diff --git a/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi3.json b/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi3.json index 97c40d4..b1ea01f 100644 --- a/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi3.json +++ b/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi3.json @@ -3,7 +3,7 @@ "instance": [ { "collectedfrom": { - "key": "10|openaire____::1234", + "key": "10|opendoar____::358aee4cc897452c00244351e4d91f69", "value": "Zenodo" }, "pid": [ diff --git a/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi4.json b/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi4.json index ac99ca9..764c510 100644 --- a/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi4.json +++ b/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi4.json @@ -3,7 +3,7 @@ "instance": [ { "collectedfrom": { - "key": "10|openaire____::1234", + "key": "10|opendoar____::358aee4cc897452c00244351e4d91f69", "value": "Zenodo" }, "pid": [ diff --git a/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi5.json b/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi5.json new file mode 100644 index 0000000..816f0dc --- /dev/null +++ b/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi5.json @@ -0,0 +1,37 @@ +{ + "id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", + "instance": [ + { + "collectedfrom": { + "key": "10|opendoar____::358aee4cc897452c00244351e4d91f69", + "value": "Zenodo" + }, + "pid": [ + { + "qualifier": {"classid": "doi"}, + "value": "10.5281/zenodo.5121485" + } + ] + }, + { + "collectedfrom": { + "key": "10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c", + "value": "Europe PubMed Central" + }, + "pid": [ + { + "qualifier": {"classid": "urn"}, + "value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2" + }, + { + "qualifier": {"classid": "scp-number"}, + "value": "79953761260" + }, + { + "qualifier": {"classid": "pmc"}, + "value": "21459329" + } + ] + } + ] +} \ No newline at end of file diff --git a/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_pmc1.json b/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_pmc1.json index 3e4ba22..537719f 100644 --- a/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_pmc1.json +++ b/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_pmc1.json @@ -1 +1,17 @@ -{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f","pid":[{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}]} \ No newline at end of file +{ + "id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", + "pid": [ + { + "qualifier": {"classid": "urn"}, + "value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2" + }, + { + "qualifier": {"classid": "scp-number"}, + "value": "79953761260" + }, + { + "qualifier": {"classid": "pmc"}, + "value": "21459329" + } + ] +} \ No newline at end of file