From b0203ff5ccd29e834047effd050abf758c8f1720 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 5 Jul 2021 14:35:26 +0200 Subject: [PATCH] added more cleaning/identifier utilities --- .../schema/oaf/utils/CleaningFunctions.java | 29 ++++++++++++------- .../schema/oaf/utils/IdentifierFactory.java | 20 +++++++------ 2 files changed, 30 insertions(+), 19 deletions(-) diff --git a/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/CleaningFunctions.java b/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/CleaningFunctions.java index 56ee75a..183214c 100644 --- a/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/CleaningFunctions.java +++ b/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/CleaningFunctions.java @@ -46,18 +46,27 @@ public class CleaningFunctions { * @return the PID containing the normalised value. */ public static StructuredProperty normalizePidValue(StructuredProperty pid) { - String value = Optional - .ofNullable(pid.getValue()) - .map(String::trim) - .orElseThrow(() -> new IllegalArgumentException("PID value cannot be empty")); - switch (pid.getQualifier().getClassid()) { + pid.setValue( + normalizePidValue( + pid.getQualifier().getClassid(), + pid.getValue())); - // TODO add cleaning for more PID types as needed - case "doi": - pid.setValue(value.toLowerCase().replaceFirst(DOI_PREFIX_REGEX, DOI_PREFIX)); - break; - } return pid; } + public static String normalizePidValue(String pidType, String pidValue) { + String value = Optional + .ofNullable(pidValue) + .map(String::trim) + .orElseThrow(() -> new IllegalArgumentException("PID value cannot be empty")); + + switch (pidType) { + + // TODO add cleaning for more PID types as needed + case "doi": + return value.toLowerCase().replaceFirst(DOI_PREFIX_REGEX, DOI_PREFIX); + } + return value; + } + } diff --git a/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactory.java b/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactory.java index 43cdbc1..d0baec5 100644 --- a/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactory.java +++ b/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactory.java @@ -164,9 +164,7 @@ public class IdentifierFactory implements Serializable { .stream() // filter away PIDs provided by a DS that is not considered an authority for the // given PID Type - .filter(p -> { - return shouldFilterPid(collectedFrom, p, mapHandles); - }) + .filter(p -> shouldFilterPid(collectedFrom, p, mapHandles)) .map(CleaningFunctions::normalizePidValue) .filter(CleaningFunctions::pidFilter)) .orElse(Stream.empty()); @@ -193,13 +191,17 @@ public class IdentifierFactory implements Serializable { } private static String idFromPid(T entity, StructuredProperty s, boolean md5) { + return idFromPid(ModelSupport.getIdPrefix(entity.getClass()), s.getQualifier().getClassid(), s.getValue(), md5); + } + + public static String idFromPid(String numericPrefix, String pidType, String pidValue, boolean md5) { return new StringBuilder() - .append(ModelSupport.getIdPrefix(entity.getClass())) - .append(ID_PREFIX_SEPARATOR) - .append(createPrefix(s.getQualifier().getClassid())) - .append(ID_SEPARATOR) - .append(md5 ? md5(s.getValue()) : s.getValue()) - .toString(); + .append(numericPrefix) + .append(ID_PREFIX_SEPARATOR) + .append(createPrefix(pidType)) + .append(ID_SEPARATOR) + .append(md5 ? md5(pidValue) : pidValue) + .toString(); } // create the prefix (length = 12)