From 5d93f5e69a398c3f45b6039da01fc367bf3c4f41 Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Fri, 23 Sep 2022 12:03:58 +0200 Subject: [PATCH] ROHub as delegated authority for w3id --- .../dnetlib/dhp/schema/oaf/utils/IdentifierFactory.java | 8 +++++++- .../dhp/schema/oaf/utils/IdentifierFactoryTest.java | 8 +++++++- .../eu/dnetlib/dhp/schema/oaf/utils/orp-rohub.json | 1 + 3 files changed, 15 insertions(+), 2 deletions(-) create mode 100644 src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/orp-rohub.json diff --git a/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactory.java b/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactory.java index 6acb7b6..32e8e39 100644 --- a/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactory.java +++ b/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactory.java @@ -32,7 +32,8 @@ public class IdentifierFactory implements Serializable { public static final int ID_PREFIX_LEN = 12; /** - * Declares the associations PID_TYPE -> [DATASOURCE ID, NAME] considered authoritative for that PID_TYPE + * Declares the associations PID_TYPE -> [DATASOURCE ID, NAME] considered authoritative for that PID_TYPE. + * The id of the record (source_::id) will be rewritten as pidType_::id) */ public static final Map> PID_AUTHORITY = Maps.newHashMap(); @@ -61,6 +62,9 @@ public class IdentifierFactory implements Serializable { /** * Declares the associations PID_TYPE -> [DATASOURCE ID, PID SUBSTRING] considered as delegated authority for that * PID_TYPE. Example, Zenodo is delegated to forge DOIs that contain the 'zenodo' word. + * + * If a record with the same id (same pid) comes from 2 data sources, the one coming from a delegated source wins. E.g. Zenodo records win over those from Datacite. + * See also https://code-repo.d4science.org/D-Net/dnet-hadoop/pulls/187 and the class dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java */ public static final Map> DELEGATED_PID_AUTHORITY = Maps.newHashMap(); @@ -68,6 +72,8 @@ public class IdentifierFactory implements Serializable { DELEGATED_PID_AUTHORITY.put(PidType.doi, new HashMap<>()); DELEGATED_PID_AUTHORITY.get(PidType.doi).put(ZENODO_OD_ID, "zenodo"); DELEGATED_PID_AUTHORITY.get(PidType.doi).put(ZENODO_R3_ID, "zenodo"); + DELEGATED_PID_AUTHORITY.put(PidType.w3id, new HashMap<>()); + DELEGATED_PID_AUTHORITY.get(PidType.w3id).put(ROHUB_ID, "ro-id"); } /** diff --git a/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactoryTest.java b/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactoryTest.java index 983ae6c..e21f57e 100644 --- a/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactoryTest.java +++ b/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactoryTest.java @@ -68,12 +68,18 @@ class IdentifierFactoryTest { verifyIdentifier("publication_5.json", defaultID, false); } + @Test + void testCreateIdentifierForROHub() throws IOException { + verifyIdentifier( + "orp-rohub.json", "50|w3id________::afc7592914ae190a50570db90f55f9c2", true); + } + protected void verifyIdentifier(String filename, String expectedID, boolean md5) throws IOException { final String json = IOUtils.toString(getClass().getResourceAsStream(filename)); final Publication pub = OBJECT_MAPPER.readValue(json, Publication.class); String id = IdentifierFactory.createIdentifier(pub, md5); - + System.out.println(id); assertNotNull(id); assertEquals(expectedID, id); } diff --git a/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/orp-rohub.json b/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/orp-rohub.json new file mode 100644 index 0000000..c989394 --- /dev/null +++ b/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/orp-rohub.json @@ -0,0 +1 @@ +{"collectedfrom":[{"key":"10|fairsharing_::1b69ebedb522700034547abc5652ffac","value":"ROHub","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1663926081966,"id":"50|w3id________::afc7592914ae190a50570db90f55f9c2","originalId":["50|fsh_____4119::afc7592914ae190a50570db90f55f9c2","https://w3id.org/ro-id/0ab171a7-45c5-4194-82d4-850955504bca"],"pid":[{"value":"https://w3id.org/ro-id/0ab171a7-45c5-4194-82d4-850955504bca","qualifier":{"classid":"w3id","classname":"w3id.org","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofcollection":"2019-03-27T15:15:22.22Z","dateoftransformation":"2019-04-17T16:04:20.586Z","extraInfo":[],"oaiprovenance":null,"processingchargeamount":null,"processingchargecurrency":null,"measures":null,"author":[{"fullname":"CNR-ISMAR","name":"","surname":"","rank":1,"pid":[],"affiliation":[]}],"resulttype":{"classid":"other","classname":"other","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[{"value":"Ecology","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"EOSC::RO-crate","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[{"value":"Using biological effects tools to define Good Environmental Status under the European Union Marine Strategy Framework Directive","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[{"value":"2018-06-20T11:21:46Z","qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"description":[{"value":"The use of biological effects tools offer enormous potential to meet the challenges outlined by the European Union Marine Strategy Framework Directive (MSFD) whereby Member States are required to develop a robust set of tools for defining 11 qualitative descriptors of Good Environmental Status (GES), such as demonstrating that \"Concentrations of contaminants are at levels not giving rise to pollution effects\" (GES Descriptor 8). This paper discusses the combined approach of monitoring chemical contaminant levels, along side biological effect measurements relating to the effect of pollutants, for undertaking assessments of GES across European marine regions. We outline the minimum standards that biological effects tools should meet if they are to be used for defining GES in relation to Descriptor 8 and describe the current international initiatives underway to develop assessment criteria for these biological effects techniques. Crown Copyright (C) 2010 Published by Elsevier Ltd. All rights reserved.","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":null,"publisher":{"value":"PoznaƄ Supercomputing and Networking Center","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"embargoenddate":null,"source":[],"fulltext":[],"format":[],"contributor":[{"value":"Generation Service","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"resourcetype":{"classid":"RO-crate","classname":"RO-crate","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"context":[],"externalReference":[],"instance":[{"license":null,"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"other research product","classname":"other research product","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|fairsharing_::1b69ebedb522700034547abc5652ffac","value":"ROHub","dataInfo":null},"url":null,"distributionlocation":null,"collectedfrom":{"key":"10|fairsharing_::1b69ebedb522700034547abc5652ffac","value":"ROHub","dataInfo":null},"pid":[{"value":"https://w3id.org/ro-id/0ab171a7-45c5-4194-82d4-850955504bca","qualifier":{"classid":"w3id","classname":"w3id.org","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"alternateIdentifier":[],"dateofacceptance":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"measures":null}],"eoscifguidelines":null,"contactperson":[],"contactgroup":[],"tool":[]}