Compare commits
1 Commits
master
...
pid_cleani
Author | SHA1 | Date |
---|---|---|
Claudio Atzori | 1135f19ec6 |
2
pom.xml
2
pom.xml
|
@ -5,7 +5,7 @@
|
|||
<groupId>eu.dnetlib.dhp</groupId>
|
||||
<artifactId>dhp-schemas</artifactId>
|
||||
<packaging>jar</packaging>
|
||||
<version>3.16.1-SNAPSHOT</version>
|
||||
<version>3.17.1-SNAPSHOT</version>
|
||||
|
||||
<licenses>
|
||||
<license>
|
||||
|
|
|
@ -21,7 +21,7 @@ public class Instance implements Serializable {
|
|||
|
||||
private KeyValue collectedfrom;
|
||||
|
||||
private List<StructuredProperty> pid;
|
||||
private List<Pid> pid;
|
||||
|
||||
private List<StructuredProperty> alternateIdentifier;
|
||||
|
||||
|
@ -95,11 +95,11 @@ public class Instance implements Serializable {
|
|||
this.collectedfrom = collectedfrom;
|
||||
}
|
||||
|
||||
public List<StructuredProperty> getPid() {
|
||||
public List<Pid> getPid() {
|
||||
return pid;
|
||||
}
|
||||
|
||||
public void setPid(List<StructuredProperty> pid) {
|
||||
public void setPid(List<Pid> pid) {
|
||||
this.pid = pid;
|
||||
}
|
||||
|
||||
|
|
|
@ -11,7 +11,7 @@ public abstract class OafEntity extends Oaf implements Serializable {
|
|||
|
||||
private List<String> originalId;
|
||||
|
||||
private List<StructuredProperty> pid;
|
||||
private List<Pid> pid;
|
||||
|
||||
private String dateofcollection;
|
||||
|
||||
|
@ -60,11 +60,11 @@ public abstract class OafEntity extends Oaf implements Serializable {
|
|||
this.originalId = originalId;
|
||||
}
|
||||
|
||||
public List<StructuredProperty> getPid() {
|
||||
public List<Pid> getPid() {
|
||||
return pid;
|
||||
}
|
||||
|
||||
public void setPid(List<StructuredProperty> pid) {
|
||||
public void setPid(List<Pid> pid) {
|
||||
this.pid = pid;
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,6 @@
|
|||
package eu.dnetlib.dhp.schema.oaf;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
public class Pid extends StructuredProperty implements Serializable {
|
||||
}
|
|
@ -570,7 +570,21 @@ public class Result extends OafEntity implements Serializable {
|
|||
private static String extractKeyFromPid(final StructuredProperty pid) {
|
||||
if (pid == null)
|
||||
return null;
|
||||
final StructuredProperty normalizedPid = CleaningFunctions.normalizePidValue(pid);
|
||||
final Pid normalizedPid = CleaningFunctions.normalizePidValue((Pid) pid);
|
||||
|
||||
return String.format("%s::%s", normalizedPid.getQualifier().getClassid(), normalizedPid.getValue());
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalize pid string.
|
||||
*
|
||||
* @param pid the pid
|
||||
* @return the string
|
||||
*/
|
||||
private static String extractKeyFromAltId(final StructuredProperty pid) {
|
||||
if (pid == null)
|
||||
return null;
|
||||
final StructuredProperty normalizedPid = CleaningFunctions.normalizeSPValue(pid);
|
||||
|
||||
return String.format("%s::%s", normalizedPid.getQualifier().getClassid(), normalizedPid.getValue());
|
||||
}
|
||||
|
@ -595,8 +609,6 @@ public class Result extends OafEntity implements Serializable {
|
|||
* @return the result map
|
||||
*/
|
||||
public static Map<String, Instance> toInstanceMap(final List<Instance> ri) {
|
||||
|
||||
|
||||
return ri
|
||||
.stream()
|
||||
.filter(i -> i.getPid() != null || i.getAlternateIdentifier() != null)
|
||||
|
@ -622,7 +634,7 @@ public class Result extends OafEntity implements Serializable {
|
|||
* @param enrichments the List of enrichment instances having the same pid
|
||||
* @return the list
|
||||
*/
|
||||
private static List<Instance> findEnrichmentsByPID(final List<StructuredProperty> pids, final Map<String,Instance> enrichments) {
|
||||
private static List<Instance> findEnrichmentsByPID(final List<Pid> pids, final Map<String,Instance> enrichments) {
|
||||
if (pids == null || enrichments == null)
|
||||
return null;
|
||||
return pids
|
||||
|
@ -633,6 +645,25 @@ public class Result extends OafEntity implements Serializable {
|
|||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
/**
|
||||
* This utility method finds the list of enrichment instances
|
||||
* that match one or more PIDs in the input list
|
||||
*
|
||||
* @param pids the list of PIDs
|
||||
* @param enrichments the List of enrichment instances having the same pid
|
||||
* @return the list
|
||||
*/
|
||||
private static List<Instance> findEnrichmentsByAlternateIdentifier(final List<StructuredProperty> pids, final Map<String,Instance> enrichments) {
|
||||
if (pids == null || enrichments == null)
|
||||
return null;
|
||||
return pids
|
||||
.stream()
|
||||
.map(Result::extractKeyFromAltId)
|
||||
.map(enrichments::get)
|
||||
.filter(Objects::nonNull)
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
/**
|
||||
* This method apply enrichment on a single instance
|
||||
* The enrichment consists of replacing values on
|
||||
|
@ -712,11 +743,11 @@ public class Result extends OafEntity implements Serializable {
|
|||
|
||||
toEnrichInstances.forEach(i -> {
|
||||
final List<Instance> e = findEnrichmentsByPID(i.getPid(), ri);
|
||||
if (e!= null && e.size()> 0) {
|
||||
if (e!= null && !e.isEmpty()) {
|
||||
e.forEach(enr -> applyEnrichment(i, enr));
|
||||
} else {
|
||||
final List<Instance> a = findEnrichmentsByPID(i.getAlternateIdentifier(), ri);
|
||||
if (a!= null && a.size()> 0) {
|
||||
final List<Instance> a = findEnrichmentsByAlternateIdentifier(i.getAlternateIdentifier(), ri);
|
||||
if (a!= null && !a.isEmpty()) {
|
||||
a.forEach(enr -> applyEnrichment(i, enr));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -8,6 +8,7 @@ import java.util.Set;
|
|||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import eu.dnetlib.dhp.schema.oaf.Pid;
|
||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||
|
||||
public class CleaningFunctions {
|
||||
|
@ -29,7 +30,7 @@ public class CleaningFunctions {
|
|||
* @param s the PID whose value will be checked.
|
||||
* @return false if the pid matches the filter criteria, true otherwise.
|
||||
*/
|
||||
public static boolean pidFilter(StructuredProperty s) {
|
||||
public static boolean pidFilter(Pid s) {
|
||||
final String pidValue = s.getValue();
|
||||
if (Objects.isNull(s.getQualifier()) ||
|
||||
StringUtils.isBlank(pidValue) ||
|
||||
|
@ -47,7 +48,7 @@ public class CleaningFunctions {
|
|||
* @param pid the PID whose value will be normalised.
|
||||
* @return the PID containing the normalised value.
|
||||
*/
|
||||
public static StructuredProperty normalizePidValue(StructuredProperty pid) {
|
||||
public static Pid normalizePidValue(Pid pid) {
|
||||
pid.setValue(
|
||||
normalizePidValue(
|
||||
pid.getQualifier().getClassid(),
|
||||
|
@ -56,6 +57,20 @@ public class CleaningFunctions {
|
|||
return pid;
|
||||
}
|
||||
|
||||
/**
|
||||
* Utility method that normalises PID values on a per-type basis.
|
||||
* @param pid the PID whose value will be normalised.
|
||||
* @return the PID containing the normalised value.
|
||||
*/
|
||||
public static StructuredProperty normalizeSPValue(StructuredProperty pid) {
|
||||
pid.setValue(
|
||||
normalizePidValue(
|
||||
pid.getQualifier().getClassid(),
|
||||
pid.getValue()));
|
||||
|
||||
return pid;
|
||||
}
|
||||
|
||||
public static String normalizePidValue(String pidType, String pidValue) {
|
||||
String value = Optional
|
||||
.ofNullable(pidValue)
|
||||
|
|
|
@ -95,7 +95,7 @@ public class IdentifierFactory implements Serializable {
|
|||
.collect(Collectors.toCollection(HashSet::new));
|
||||
}
|
||||
|
||||
public static List<StructuredProperty> getPids(List<StructuredProperty> pid, KeyValue collectedFrom) {
|
||||
public static List<StructuredProperty> getPids(List<Pid> pid, KeyValue collectedFrom) {
|
||||
return pidFromInstance(pid, collectedFrom, true).distinct().collect(Collectors.toList());
|
||||
}
|
||||
|
||||
|
@ -143,7 +143,7 @@ public class IdentifierFactory implements Serializable {
|
|||
|
||||
checkArgument(StringUtils.isNoneBlank(entity.getId()), "missing entity identifier");
|
||||
|
||||
final Map<String, Set<StructuredProperty>> pids = extractPids(entity);
|
||||
final Map<String, Set<Pid>> pids = extractPids(entity);
|
||||
|
||||
return pids
|
||||
.values()
|
||||
|
@ -164,7 +164,7 @@ public class IdentifierFactory implements Serializable {
|
|||
.orElseGet(entity::getId);
|
||||
}
|
||||
|
||||
private static <T extends OafEntity> Map<String, Set<StructuredProperty>> extractPids(T entity) {
|
||||
private static <T extends OafEntity> Map<String, Set<Pid>> extractPids(T entity) {
|
||||
if (entity instanceof Result) {
|
||||
return Optional
|
||||
.ofNullable(((Result) entity).getInstance())
|
||||
|
@ -184,7 +184,7 @@ public class IdentifierFactory implements Serializable {
|
|||
}
|
||||
}
|
||||
|
||||
private static Map<String, Set<StructuredProperty>> mapPids(List<Instance> instance) {
|
||||
private static Map<String, Set<Pid>> mapPids(List<Instance> instance) {
|
||||
return instance
|
||||
.stream()
|
||||
.map(i -> pidFromInstance(i.getPid(), i.getCollectedfrom(), false))
|
||||
|
@ -196,7 +196,7 @@ public class IdentifierFactory implements Serializable {
|
|||
Collectors.mapping(p -> p, Collectors.toCollection(HashSet::new))));
|
||||
}
|
||||
|
||||
private static Stream<StructuredProperty> pidFromInstance(List<StructuredProperty> pid, KeyValue collectedFrom,
|
||||
private static Stream<Pid> pidFromInstance(List<Pid> pid, KeyValue collectedFrom,
|
||||
boolean mapHandles) {
|
||||
return Optional
|
||||
.ofNullable(pid)
|
||||
|
|
|
@ -4,12 +4,13 @@ package eu.dnetlib.dhp.schema.oaf.utils;
|
|||
import java.util.Comparator;
|
||||
import java.util.Optional;
|
||||
|
||||
import eu.dnetlib.dhp.schema.oaf.Pid;
|
||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||
|
||||
public class PidValueComparator implements Comparator<StructuredProperty> {
|
||||
public class PidValueComparator implements Comparator<Pid> {
|
||||
|
||||
@Override
|
||||
public int compare(StructuredProperty left, StructuredProperty right) {
|
||||
public int compare(Pid left, Pid right) {
|
||||
|
||||
if (left == null && right == null)
|
||||
return 0;
|
||||
|
@ -18,15 +19,15 @@ public class PidValueComparator implements Comparator<StructuredProperty> {
|
|||
if (right == null)
|
||||
return -1;
|
||||
|
||||
StructuredProperty l = CleaningFunctions.normalizePidValue(left);
|
||||
StructuredProperty r = CleaningFunctions.normalizePidValue(right);
|
||||
Pid l = CleaningFunctions.normalizePidValue(left);
|
||||
Pid r = CleaningFunctions.normalizePidValue(right);
|
||||
|
||||
return Optional
|
||||
.ofNullable(l.getValue())
|
||||
.map(
|
||||
lv -> Optional
|
||||
.ofNullable(r.getValue())
|
||||
.map(rv -> lv.compareTo(rv))
|
||||
.map(lv::compareTo)
|
||||
.orElse(-1))
|
||||
.orElse(1);
|
||||
}
|
||||
|
|
|
@ -109,10 +109,20 @@ class MergeTest {
|
|||
final Result currentPub = source.get(i);
|
||||
final Result currentEnrichment = enrichment.get(i);
|
||||
final Instance currentInstance = Objects.requireNonNull(currentPub.getInstance()).get(0);
|
||||
if (overrideAlternateIdentifier)
|
||||
currentInstance.setAlternateIdentifier(Objects.requireNonNull(currentEnrichment.getInstance()).get(0).getPid());
|
||||
else
|
||||
currentInstance.setPid(Objects.requireNonNull(currentEnrichment.getInstance()).get(0).getPid());
|
||||
final List<Pid> pid = Objects.requireNonNull(currentEnrichment.getInstance()).get(0).getPid();
|
||||
if (overrideAlternateIdentifier) {
|
||||
currentInstance.setAlternateIdentifier(pid.stream()
|
||||
.map(p -> {
|
||||
StructuredProperty sp = new StructuredProperty();
|
||||
sp.setValue(p.getValue());
|
||||
sp.setQualifier(p.getQualifier());
|
||||
sp.setDataInfo(p.getDataInfo());
|
||||
return sp;
|
||||
})
|
||||
.collect(Collectors.toList()));
|
||||
} else {
|
||||
currentInstance.setPid(pid);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue