Merge branch 'master' into datasource_model_eosc

This commit is contained in:
Claudio Atzori 2021-07-29 17:27:30 +02:00
commit 00693d6f9b
7 changed files with 102 additions and 9 deletions

View File

@ -1,7 +1,10 @@
package eu.dnetlib.dhp.schema.common; package eu.dnetlib.dhp.schema.common;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.AccessRight;
import eu.dnetlib.dhp.schema.oaf.DataInfo;
import eu.dnetlib.dhp.schema.oaf.KeyValue;
import eu.dnetlib.dhp.schema.oaf.Qualifier;
public class ModelConstants { public class ModelConstants {
@ -13,6 +16,9 @@ public class ModelConstants {
public static final String CROSSREF_ID = "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2"; public static final String CROSSREF_ID = "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2";
public static final String DATACITE_ID = "10|openaire____::9e3be59865b2c1c335d32dae2fe7b254"; public static final String DATACITE_ID = "10|openaire____::9e3be59865b2c1c335d32dae2fe7b254";
public static final String ZENODO_OD_ID = "10|opendoar____::358aee4cc897452c00244351e4d91f69";
public static final String ZENODO_R3_ID = "10|re3data_____::7b0ad08687b2c960d5aeef06f811d5e6";
public static final String EUROPE_PUBMED_CENTRAL_ID = "10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c"; public static final String EUROPE_PUBMED_CENTRAL_ID = "10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c";
public static final String PUBMED_CENTRAL_ID = "10|opendoar____::eda80a3d5b344bc40f3bc04f65b7a357"; public static final String PUBMED_CENTRAL_ID = "10|opendoar____::eda80a3d5b344bc40f3bc04f65b7a357";
public static final String ARXIV_ID = "10|opendoar____::6f4922f45568161a8cdf4ad2299f6d23"; public static final String ARXIV_ID = "10|opendoar____::6f4922f45568161a8cdf4ad2299f6d23";
@ -21,6 +27,9 @@ public class ModelConstants {
// VOCABULARY VALUE // VOCABULARY VALUE
public static final String ACCESS_RIGHT_OPEN = "OPEN"; public static final String ACCESS_RIGHT_OPEN = "OPEN";
public static final String ACCESS_RIGHT_EMBARGO = "EMBARGO";
public static final String ACCESS_RIGHT_CLOSED = "CLOSED";
public static final String DNET_SUBJECT_TYPOLOGIES = "dnet:subject_classification_typologies"; public static final String DNET_SUBJECT_TYPOLOGIES = "dnet:subject_classification_typologies";
public static final String DNET_RESULT_TYPOLOGIES = "dnet:result_typologies"; public static final String DNET_RESULT_TYPOLOGIES = "dnet:result_typologies";
@ -194,8 +203,8 @@ public class ModelConstants {
return q; return q;
} }
private static KeyValue keyValue(String key, String value) { private static KeyValue keyValue(final String key, final String value) {
KeyValue kv = new KeyValue(); final KeyValue kv = new KeyValue();
kv.setKey(key); kv.setKey(key);
kv.setValue(value); kv.setValue(value);
kv.setDataInfo(new DataInfo()); kv.setDataInfo(new DataInfo());

View File

@ -40,6 +40,8 @@ public class IdentifierFactory implements Serializable {
PID_AUTHORITY.put(PidType.doi, HashBiMap.create()); PID_AUTHORITY.put(PidType.doi, HashBiMap.create());
PID_AUTHORITY.get(PidType.doi).put(CROSSREF_ID, "Crossref"); PID_AUTHORITY.get(PidType.doi).put(CROSSREF_ID, "Crossref");
PID_AUTHORITY.get(PidType.doi).put(DATACITE_ID, "Datacite"); PID_AUTHORITY.get(PidType.doi).put(DATACITE_ID, "Datacite");
PID_AUTHORITY.get(PidType.doi).put(ZENODO_OD_ID, "ZENODO");
PID_AUTHORITY.get(PidType.doi).put(ZENODO_R3_ID, "Zenodo");
PID_AUTHORITY.put(PidType.pmc, HashBiMap.create()); PID_AUTHORITY.put(PidType.pmc, HashBiMap.create());
PID_AUTHORITY.get(PidType.pmc).put(EUROPE_PUBMED_CENTRAL_ID, "Europe PubMed Central"); PID_AUTHORITY.get(PidType.pmc).put(EUROPE_PUBMED_CENTRAL_ID, "Europe PubMed Central");
@ -53,6 +55,18 @@ public class IdentifierFactory implements Serializable {
PID_AUTHORITY.get(PidType.arXiv).put(ARXIV_ID, "arXiv.org e-Print Archive"); PID_AUTHORITY.get(PidType.arXiv).put(ARXIV_ID, "arXiv.org e-Print Archive");
} }
/**
* Declares the associations PID_TYPE -> [DATASOURCE ID, PID SUBSTRING] considered as delegated authority for that
* PID_TYPE. Example, Zenodo is delegated to forge DOIs that contain the 'zenodo' word.
*/
public static final Map<PidType, Map<String, String>> DELEGATED_PID_AUTHORITY = Maps.newHashMap();
static {
DELEGATED_PID_AUTHORITY.put(PidType.doi, new HashMap<>());
DELEGATED_PID_AUTHORITY.get(PidType.doi).put(ZENODO_OD_ID, "zenodo");
DELEGATED_PID_AUTHORITY.get(PidType.doi).put(ZENODO_R3_ID, "zenodo");
}
public static List<StructuredProperty> getPids(List<StructuredProperty> pid, KeyValue collectedFrom) { public static List<StructuredProperty> getPids(List<StructuredProperty> pid, KeyValue collectedFrom) {
return pidFromInstance(pid, collectedFrom, true).distinct().collect(Collectors.toList()); return pidFromInstance(pid, collectedFrom, true).distinct().collect(Collectors.toList());
} }
@ -164,24 +178,38 @@ public class IdentifierFactory implements Serializable {
.stream() .stream()
// filter away PIDs provided by a DS that is not considered an authority for the // filter away PIDs provided by a DS that is not considered an authority for the
// given PID Type // given PID Type
.filter(p -> shouldFilterPid(collectedFrom, p, mapHandles)) .filter(p -> shouldFilterPidByAuthority(collectedFrom, p, mapHandles))
.map(CleaningFunctions::normalizePidValue) .map(CleaningFunctions::normalizePidValue)
.filter(p -> isNotFromDelegatedAuthority(collectedFrom, p))
.filter(CleaningFunctions::pidFilter)) .filter(CleaningFunctions::pidFilter))
.orElse(Stream.empty()); .orElse(Stream.empty());
} }
private static boolean shouldFilterPid(KeyValue collectedFrom, StructuredProperty p, boolean mapHandles) { private static boolean shouldFilterPidByAuthority(KeyValue collectedFrom, StructuredProperty p, boolean mapHandles) {
final PidType pType = PidType.tryValueOf(p.getQualifier().getClassid()); final PidType pType = PidType.tryValueOf(p.getQualifier().getClassid());
return (mapHandles && pType.equals(PidType.handle)) || Optional.ofNullable(collectedFrom).isPresent() && return (mapHandles && pType.equals(PidType.handle)) || Optional.ofNullable(collectedFrom).isPresent() &&
Optional Optional
.ofNullable(PID_AUTHORITY.get(pType)) .ofNullable(PID_AUTHORITY.get(pType))
.map(authorities -> { .map(authorities -> {
return authorities.containsKey(collectedFrom.getKey()) return authorities.containsKey(collectedFrom.getKey())
|| authorities.containsValue(collectedFrom.getValue()); || authorities.containsValue(collectedFrom.getValue());
}) })
.orElse(false); .orElse(false);
} }
private static boolean isNotFromDelegatedAuthority(KeyValue collectedFrom, StructuredProperty p) {
final PidType pType = PidType.tryValueOf(p.getQualifier().getClassid());
final Map<String, String> da = DELEGATED_PID_AUTHORITY.get(pType);
if (Objects.isNull(da)) {
return true;
}
if (!da.containsKey(collectedFrom.getKey())) {
return true;
}
return StringUtils.contains(p.getValue(), da.get(collectedFrom.getKey()));
}
/** /**
* @see {@link IdentifierFactory#createIdentifier(OafEntity, boolean)} * @see {@link IdentifierFactory#createIdentifier(OafEntity, boolean)}
*/ */

View File

@ -34,6 +34,9 @@ public class IdentifierFactoryTest {
verifyIdentifier( verifyIdentifier(
"publication_doi4.json", "50|od______2852::38861c44e6052a8d49f59a4c39ba5e66", true); "publication_doi4.json", "50|od______2852::38861c44e6052a8d49f59a4c39ba5e66", true);
verifyIdentifier(
"publication_doi5.json", "50|doi_________::3bef95c0ca26dd55451fc8839ea69d27", true);
verifyIdentifier( verifyIdentifier(
"publication_pmc1.json", "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", true); "publication_pmc1.json", "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", true);

View File

@ -3,7 +3,7 @@
"instance": [ "instance": [
{ {
"collectedfrom": { "collectedfrom": {
"key": "10|openaire____::1234", "key": "10|opendoar____::358aee4cc897452c00244351e4d91f69",
"value": "Zenodo" "value": "Zenodo"
}, },
"pid": [ "pid": [

View File

@ -3,7 +3,7 @@
"instance": [ "instance": [
{ {
"collectedfrom": { "collectedfrom": {
"key": "10|openaire____::1234", "key": "10|opendoar____::358aee4cc897452c00244351e4d91f69",
"value": "Zenodo" "value": "Zenodo"
}, },
"pid": [ "pid": [

View File

@ -0,0 +1,37 @@
{
"id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f",
"instance": [
{
"collectedfrom": {
"key": "10|opendoar____::358aee4cc897452c00244351e4d91f69",
"value": "Zenodo"
},
"pid": [
{
"qualifier": {"classid": "doi"},
"value": "10.5281/zenodo.5121485"
}
]
},
{
"collectedfrom": {
"key": "10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c",
"value": "Europe PubMed Central"
},
"pid": [
{
"qualifier": {"classid": "urn"},
"value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"
},
{
"qualifier": {"classid": "scp-number"},
"value": "79953761260"
},
{
"qualifier": {"classid": "pmc"},
"value": "21459329"
}
]
}
]
}

View File

@ -1 +1,17 @@
{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f","pid":[{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}]} {
"id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f",
"pid": [
{
"qualifier": {"classid": "urn"},
"value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"
},
{
"qualifier": {"classid": "scp-number"},
"value": "79953761260"
},
{
"qualifier": {"classid": "pmc"},
"value": "21459329"
}
]
}