do not make the identifier prefix depend on the Handle

This commit is contained in:
Claudio Atzori 2021-04-15 16:48:26 +02:00
parent 3d58f95522
commit ba4b4c74d8
3 changed files with 51 additions and 11 deletions

View File

@ -57,7 +57,7 @@ public class IdentifierFactory implements Serializable {
} }
public static List<StructuredProperty> getPids(List<StructuredProperty> pid, KeyValue collectedFrom) { public static List<StructuredProperty> getPids(List<StructuredProperty> pid, KeyValue collectedFrom) {
return pidFromInstance(pid, collectedFrom).distinct().collect(Collectors.toList()); return pidFromInstance(pid, collectedFrom, true).distinct().collect(Collectors.toList());
} }
public static <T extends Result> String createDOIBoostIdentifier(T entity) { public static <T extends Result> String createDOIBoostIdentifier(T entity) {
@ -104,7 +104,7 @@ public class IdentifierFactory implements Serializable {
checkArgument(StringUtils.isNoneBlank(entity.getId()), "missing entity identifier"); checkArgument(StringUtils.isNoneBlank(entity.getId()), "missing entity identifier");
final Map<String, List<StructuredProperty>> pids = extractPids(entity); final Map<String, Set<StructuredProperty>> pids = extractPids(entity);
return pids return pids
.values() .values()
@ -125,7 +125,7 @@ public class IdentifierFactory implements Serializable {
.orElseGet(entity::getId); .orElseGet(entity::getId);
} }
private static <T extends OafEntity> Map<String, List<StructuredProperty>> extractPids(T entity) { private static <T extends OafEntity> Map<String, Set<StructuredProperty>> extractPids(T entity) {
if (entity instanceof Result) { if (entity instanceof Result) {
return Optional return Optional
.ofNullable(((Result) entity).getInstance()) .ofNullable(((Result) entity).getInstance())
@ -142,23 +142,23 @@ public class IdentifierFactory implements Serializable {
Collectors Collectors
.groupingBy( .groupingBy(
p -> p.getQualifier().getClassid(), p -> p.getQualifier().getClassid(),
Collectors.mapping(p -> p, Collectors.toList()))); Collectors.mapping(p -> p, Collectors.toCollection(HashSet::new))));
} }
} }
private static Map<String, List<StructuredProperty>> mapPids(List<Instance> instance) { private static Map<String, Set<StructuredProperty>> mapPids(List<Instance> instance) {
return instance return instance
.stream() .stream()
.map(i -> pidFromInstance(i.getPid(), i.getCollectedfrom())) .map(i -> pidFromInstance(i.getPid(), i.getCollectedfrom(), false))
.flatMap(Function.identity()) .flatMap(Function.identity())
.collect( .collect(
Collectors Collectors
.groupingBy( .groupingBy(
p -> p.getQualifier().getClassid(), p -> p.getQualifier().getClassid(),
Collectors.mapping(p -> p, Collectors.toList()))); Collectors.mapping(p -> p, Collectors.toCollection(HashSet::new))));
} }
private static Stream<StructuredProperty> pidFromInstance(List<StructuredProperty> pid, KeyValue collectedFrom) { private static Stream<StructuredProperty> pidFromInstance(List<StructuredProperty> pid, KeyValue collectedFrom, boolean mapHandles) {
return Optional return Optional
.ofNullable(pid) .ofNullable(pid)
.map( .map(
@ -167,16 +167,16 @@ public class IdentifierFactory implements Serializable {
// filter away PIDs provided by a DS that is not considered an authority for the // filter away PIDs provided by a DS that is not considered an authority for the
// given PID Type // given PID Type
.filter(p -> { .filter(p -> {
return shouldFilterPid(collectedFrom, p); return shouldFilterPid(collectedFrom, p, mapHandles);
}) })
.map(CleaningFunctions::normalizePidValue) .map(CleaningFunctions::normalizePidValue)
.filter(CleaningFunctions::pidFilter)) .filter(CleaningFunctions::pidFilter))
.orElse(Stream.empty()); .orElse(Stream.empty());
} }
private static boolean shouldFilterPid(KeyValue collectedFrom, StructuredProperty p) { private static boolean shouldFilterPid(KeyValue collectedFrom, StructuredProperty p, boolean mapHandles) {
final PidType pType = PidType.tryValueOf(p.getQualifier().getClassid()); final PidType pType = PidType.tryValueOf(p.getQualifier().getClassid());
return pType.equals(PidType.handle) || Optional.ofNullable(collectedFrom).isPresent() && return (mapHandles && pType.equals(PidType.handle)) || Optional.ofNullable(collectedFrom).isPresent() &&
Optional Optional
.ofNullable(PID_AUTHORITY.get(pType)) .ofNullable(PID_AUTHORITY.get(pType))
.map(authorities -> { .map(authorities -> {

View File

@ -31,6 +31,9 @@ public class IdentifierFactoryTest {
verifyIdentifier( verifyIdentifier(
"publication_doi3.json", "50|pmc_________::94e4cb08c93f8733b48e2445d04002ac", true); "publication_doi3.json", "50|pmc_________::94e4cb08c93f8733b48e2445d04002ac", true);
verifyIdentifier(
"publication_doi4.json", "50|od______2852::38861c44e6052a8d49f59a4c39ba5e66", true);
verifyIdentifier( verifyIdentifier(
"publication_pmc1.json", "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", true); "publication_pmc1.json", "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", true);

View File

@ -0,0 +1,37 @@
{
"id": "50|od______2852::38861c44e6052a8d49f59a4c39ba5e66",
"instance": [
{
"collectedfrom": {
"key": "10|openaire____::1234",
"value": "Zenodo"
},
"pid": [
{
"qualifier": {"classid": "doi"},
"value": "10.1016/j.cmet.2010.03.013"
},
{
"qualifier": {"classid": "handle"},
"value": "11012/83840"
}
]
},
{
"collectedfrom": {
"key": "10|opendoar____::2852",
"value": "Digital library of Brno University of Technology"
},
"pid": [
{
"qualifier": {"classid": "pmc"},
"value": "21459329"
},
{
"qualifier": {"classid": "handle"},
"value": "11012/83840"
}
]
}
]
}