diff --git a/pom.xml b/pom.xml index 4e353cb..5261849 100644 --- a/pom.xml +++ b/pom.xml @@ -5,7 +5,7 @@ eu.dnetlib.dhp dhp-schemas jar - 3.16.1-SNAPSHOT + 3.17.1-SNAPSHOT diff --git a/src/main/java/eu/dnetlib/dhp/schema/oaf/Instance.java b/src/main/java/eu/dnetlib/dhp/schema/oaf/Instance.java index 6bb2908..f7f7cec 100644 --- a/src/main/java/eu/dnetlib/dhp/schema/oaf/Instance.java +++ b/src/main/java/eu/dnetlib/dhp/schema/oaf/Instance.java @@ -21,7 +21,7 @@ public class Instance implements Serializable { private KeyValue collectedfrom; - private List pid; + private List pid; private List alternateIdentifier; @@ -95,11 +95,11 @@ public class Instance implements Serializable { this.collectedfrom = collectedfrom; } - public List getPid() { + public List getPid() { return pid; } - public void setPid(List pid) { + public void setPid(List pid) { this.pid = pid; } diff --git a/src/main/java/eu/dnetlib/dhp/schema/oaf/OafEntity.java b/src/main/java/eu/dnetlib/dhp/schema/oaf/OafEntity.java index be3bf83..e8081ca 100644 --- a/src/main/java/eu/dnetlib/dhp/schema/oaf/OafEntity.java +++ b/src/main/java/eu/dnetlib/dhp/schema/oaf/OafEntity.java @@ -11,7 +11,7 @@ public abstract class OafEntity extends Oaf implements Serializable { private List originalId; - private List pid; + private List pid; private String dateofcollection; @@ -60,11 +60,11 @@ public abstract class OafEntity extends Oaf implements Serializable { this.originalId = originalId; } - public List getPid() { + public List getPid() { return pid; } - public void setPid(List pid) { + public void setPid(List pid) { this.pid = pid; } diff --git a/src/main/java/eu/dnetlib/dhp/schema/oaf/Pid.java b/src/main/java/eu/dnetlib/dhp/schema/oaf/Pid.java new file mode 100644 index 0000000..dc3cc06 --- /dev/null +++ b/src/main/java/eu/dnetlib/dhp/schema/oaf/Pid.java @@ -0,0 +1,6 @@ +package eu.dnetlib.dhp.schema.oaf; + +import java.io.Serializable; + +public class Pid extends StructuredProperty implements Serializable { +} diff --git a/src/main/java/eu/dnetlib/dhp/schema/oaf/Result.java b/src/main/java/eu/dnetlib/dhp/schema/oaf/Result.java index 0027fc4..37aa022 100644 --- a/src/main/java/eu/dnetlib/dhp/schema/oaf/Result.java +++ b/src/main/java/eu/dnetlib/dhp/schema/oaf/Result.java @@ -570,7 +570,21 @@ public class Result extends OafEntity implements Serializable { private static String extractKeyFromPid(final StructuredProperty pid) { if (pid == null) return null; - final StructuredProperty normalizedPid = CleaningFunctions.normalizePidValue(pid); + final Pid normalizedPid = CleaningFunctions.normalizePidValue((Pid) pid); + + return String.format("%s::%s", normalizedPid.getQualifier().getClassid(), normalizedPid.getValue()); + } + + /** + * Normalize pid string. + * + * @param pid the pid + * @return the string + */ + private static String extractKeyFromAltId(final StructuredProperty pid) { + if (pid == null) + return null; + final StructuredProperty normalizedPid = CleaningFunctions.normalizeSPValue(pid); return String.format("%s::%s", normalizedPid.getQualifier().getClassid(), normalizedPid.getValue()); } @@ -595,8 +609,6 @@ public class Result extends OafEntity implements Serializable { * @return the result map */ public static Map toInstanceMap(final List ri) { - - return ri .stream() .filter(i -> i.getPid() != null || i.getAlternateIdentifier() != null) @@ -622,7 +634,7 @@ public class Result extends OafEntity implements Serializable { * @param enrichments the List of enrichment instances having the same pid * @return the list */ - private static List findEnrichmentsByPID(final List pids, final Map enrichments) { + private static List findEnrichmentsByPID(final List pids, final Map enrichments) { if (pids == null || enrichments == null) return null; return pids @@ -633,6 +645,25 @@ public class Result extends OafEntity implements Serializable { .collect(Collectors.toList()); } + /** + * This utility method finds the list of enrichment instances + * that match one or more PIDs in the input list + * + * @param pids the list of PIDs + * @param enrichments the List of enrichment instances having the same pid + * @return the list + */ + private static List findEnrichmentsByAlternateIdentifier(final List pids, final Map enrichments) { + if (pids == null || enrichments == null) + return null; + return pids + .stream() + .map(Result::extractKeyFromAltId) + .map(enrichments::get) + .filter(Objects::nonNull) + .collect(Collectors.toList()); + } + /** * This method apply enrichment on a single instance * The enrichment consists of replacing values on @@ -712,11 +743,11 @@ public class Result extends OafEntity implements Serializable { toEnrichInstances.forEach(i -> { final List e = findEnrichmentsByPID(i.getPid(), ri); - if (e!= null && e.size()> 0) { + if (e!= null && !e.isEmpty()) { e.forEach(enr -> applyEnrichment(i, enr)); } else { - final List a = findEnrichmentsByPID(i.getAlternateIdentifier(), ri); - if (a!= null && a.size()> 0) { + final List a = findEnrichmentsByAlternateIdentifier(i.getAlternateIdentifier(), ri); + if (a!= null && !a.isEmpty()) { a.forEach(enr -> applyEnrichment(i, enr)); } } diff --git a/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/CleaningFunctions.java b/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/CleaningFunctions.java index 1d12478..5c77c0c 100644 --- a/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/CleaningFunctions.java +++ b/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/CleaningFunctions.java @@ -8,6 +8,7 @@ import java.util.Set; import org.apache.commons.lang3.StringUtils; +import eu.dnetlib.dhp.schema.oaf.Pid; import eu.dnetlib.dhp.schema.oaf.StructuredProperty; public class CleaningFunctions { @@ -29,7 +30,7 @@ public class CleaningFunctions { * @param s the PID whose value will be checked. * @return false if the pid matches the filter criteria, true otherwise. */ - public static boolean pidFilter(StructuredProperty s) { + public static boolean pidFilter(Pid s) { final String pidValue = s.getValue(); if (Objects.isNull(s.getQualifier()) || StringUtils.isBlank(pidValue) || @@ -47,7 +48,7 @@ public class CleaningFunctions { * @param pid the PID whose value will be normalised. * @return the PID containing the normalised value. */ - public static StructuredProperty normalizePidValue(StructuredProperty pid) { + public static Pid normalizePidValue(Pid pid) { pid.setValue( normalizePidValue( pid.getQualifier().getClassid(), @@ -56,6 +57,20 @@ public class CleaningFunctions { return pid; } + /** + * Utility method that normalises PID values on a per-type basis. + * @param pid the PID whose value will be normalised. + * @return the PID containing the normalised value. + */ + public static StructuredProperty normalizeSPValue(StructuredProperty pid) { + pid.setValue( + normalizePidValue( + pid.getQualifier().getClassid(), + pid.getValue())); + + return pid; + } + public static String normalizePidValue(String pidType, String pidValue) { String value = Optional .ofNullable(pidValue) diff --git a/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactory.java b/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactory.java index 32e8e39..a5085bc 100644 --- a/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactory.java +++ b/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactory.java @@ -95,7 +95,7 @@ public class IdentifierFactory implements Serializable { .collect(Collectors.toCollection(HashSet::new)); } - public static List getPids(List pid, KeyValue collectedFrom) { + public static List getPids(List pid, KeyValue collectedFrom) { return pidFromInstance(pid, collectedFrom, true).distinct().collect(Collectors.toList()); } @@ -143,7 +143,7 @@ public class IdentifierFactory implements Serializable { checkArgument(StringUtils.isNoneBlank(entity.getId()), "missing entity identifier"); - final Map> pids = extractPids(entity); + final Map> pids = extractPids(entity); return pids .values() @@ -164,7 +164,7 @@ public class IdentifierFactory implements Serializable { .orElseGet(entity::getId); } - private static Map> extractPids(T entity) { + private static Map> extractPids(T entity) { if (entity instanceof Result) { return Optional .ofNullable(((Result) entity).getInstance()) @@ -184,7 +184,7 @@ public class IdentifierFactory implements Serializable { } } - private static Map> mapPids(List instance) { + private static Map> mapPids(List instance) { return instance .stream() .map(i -> pidFromInstance(i.getPid(), i.getCollectedfrom(), false)) @@ -196,7 +196,7 @@ public class IdentifierFactory implements Serializable { Collectors.mapping(p -> p, Collectors.toCollection(HashSet::new)))); } - private static Stream pidFromInstance(List pid, KeyValue collectedFrom, + private static Stream pidFromInstance(List pid, KeyValue collectedFrom, boolean mapHandles) { return Optional .ofNullable(pid) diff --git a/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidValueComparator.java b/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidValueComparator.java index 0e20835..8bdb008 100644 --- a/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidValueComparator.java +++ b/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidValueComparator.java @@ -4,12 +4,13 @@ package eu.dnetlib.dhp.schema.oaf.utils; import java.util.Comparator; import java.util.Optional; +import eu.dnetlib.dhp.schema.oaf.Pid; import eu.dnetlib.dhp.schema.oaf.StructuredProperty; -public class PidValueComparator implements Comparator { +public class PidValueComparator implements Comparator { @Override - public int compare(StructuredProperty left, StructuredProperty right) { + public int compare(Pid left, Pid right) { if (left == null && right == null) return 0; @@ -18,15 +19,15 @@ public class PidValueComparator implements Comparator { if (right == null) return -1; - StructuredProperty l = CleaningFunctions.normalizePidValue(left); - StructuredProperty r = CleaningFunctions.normalizePidValue(right); + Pid l = CleaningFunctions.normalizePidValue(left); + Pid r = CleaningFunctions.normalizePidValue(right); return Optional .ofNullable(l.getValue()) .map( lv -> Optional .ofNullable(r.getValue()) - .map(rv -> lv.compareTo(rv)) + .map(lv::compareTo) .orElse(-1)) .orElse(1); } diff --git a/src/test/java/eu/dnetlib/dhp/schema/oaf/MergeTest.java b/src/test/java/eu/dnetlib/dhp/schema/oaf/MergeTest.java index 4479ea1..afd4e45 100644 --- a/src/test/java/eu/dnetlib/dhp/schema/oaf/MergeTest.java +++ b/src/test/java/eu/dnetlib/dhp/schema/oaf/MergeTest.java @@ -109,10 +109,20 @@ class MergeTest { final Result currentPub = source.get(i); final Result currentEnrichment = enrichment.get(i); final Instance currentInstance = Objects.requireNonNull(currentPub.getInstance()).get(0); - if (overrideAlternateIdentifier) - currentInstance.setAlternateIdentifier(Objects.requireNonNull(currentEnrichment.getInstance()).get(0).getPid()); - else - currentInstance.setPid(Objects.requireNonNull(currentEnrichment.getInstance()).get(0).getPid()); + final List pid = Objects.requireNonNull(currentEnrichment.getInstance()).get(0).getPid(); + if (overrideAlternateIdentifier) { + currentInstance.setAlternateIdentifier(pid.stream() + .map(p -> { + StructuredProperty sp = new StructuredProperty(); + sp.setValue(p.getValue()); + sp.setQualifier(p.getQualifier()); + sp.setDataInfo(p.getDataInfo()); + return sp; + }) + .collect(Collectors.toList())); + } else { + currentInstance.setPid(pid); + } } }