Compare commits
1 Commits
master
...
pid_cleani
Author | SHA1 | Date |
---|---|---|
Claudio Atzori | 1135f19ec6 |
2
pom.xml
2
pom.xml
|
@ -5,7 +5,7 @@
|
||||||
<groupId>eu.dnetlib.dhp</groupId>
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
<artifactId>dhp-schemas</artifactId>
|
<artifactId>dhp-schemas</artifactId>
|
||||||
<packaging>jar</packaging>
|
<packaging>jar</packaging>
|
||||||
<version>3.16.1-SNAPSHOT</version>
|
<version>3.17.1-SNAPSHOT</version>
|
||||||
|
|
||||||
<licenses>
|
<licenses>
|
||||||
<license>
|
<license>
|
||||||
|
|
|
@ -21,7 +21,7 @@ public class Instance implements Serializable {
|
||||||
|
|
||||||
private KeyValue collectedfrom;
|
private KeyValue collectedfrom;
|
||||||
|
|
||||||
private List<StructuredProperty> pid;
|
private List<Pid> pid;
|
||||||
|
|
||||||
private List<StructuredProperty> alternateIdentifier;
|
private List<StructuredProperty> alternateIdentifier;
|
||||||
|
|
||||||
|
@ -95,11 +95,11 @@ public class Instance implements Serializable {
|
||||||
this.collectedfrom = collectedfrom;
|
this.collectedfrom = collectedfrom;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<StructuredProperty> getPid() {
|
public List<Pid> getPid() {
|
||||||
return pid;
|
return pid;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setPid(List<StructuredProperty> pid) {
|
public void setPid(List<Pid> pid) {
|
||||||
this.pid = pid;
|
this.pid = pid;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -11,7 +11,7 @@ public abstract class OafEntity extends Oaf implements Serializable {
|
||||||
|
|
||||||
private List<String> originalId;
|
private List<String> originalId;
|
||||||
|
|
||||||
private List<StructuredProperty> pid;
|
private List<Pid> pid;
|
||||||
|
|
||||||
private String dateofcollection;
|
private String dateofcollection;
|
||||||
|
|
||||||
|
@ -60,11 +60,11 @@ public abstract class OafEntity extends Oaf implements Serializable {
|
||||||
this.originalId = originalId;
|
this.originalId = originalId;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<StructuredProperty> getPid() {
|
public List<Pid> getPid() {
|
||||||
return pid;
|
return pid;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setPid(List<StructuredProperty> pid) {
|
public void setPid(List<Pid> pid) {
|
||||||
this.pid = pid;
|
this.pid = pid;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,6 @@
|
||||||
|
package eu.dnetlib.dhp.schema.oaf;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
public class Pid extends StructuredProperty implements Serializable {
|
||||||
|
}
|
|
@ -570,7 +570,21 @@ public class Result extends OafEntity implements Serializable {
|
||||||
private static String extractKeyFromPid(final StructuredProperty pid) {
|
private static String extractKeyFromPid(final StructuredProperty pid) {
|
||||||
if (pid == null)
|
if (pid == null)
|
||||||
return null;
|
return null;
|
||||||
final StructuredProperty normalizedPid = CleaningFunctions.normalizePidValue(pid);
|
final Pid normalizedPid = CleaningFunctions.normalizePidValue((Pid) pid);
|
||||||
|
|
||||||
|
return String.format("%s::%s", normalizedPid.getQualifier().getClassid(), normalizedPid.getValue());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Normalize pid string.
|
||||||
|
*
|
||||||
|
* @param pid the pid
|
||||||
|
* @return the string
|
||||||
|
*/
|
||||||
|
private static String extractKeyFromAltId(final StructuredProperty pid) {
|
||||||
|
if (pid == null)
|
||||||
|
return null;
|
||||||
|
final StructuredProperty normalizedPid = CleaningFunctions.normalizeSPValue(pid);
|
||||||
|
|
||||||
return String.format("%s::%s", normalizedPid.getQualifier().getClassid(), normalizedPid.getValue());
|
return String.format("%s::%s", normalizedPid.getQualifier().getClassid(), normalizedPid.getValue());
|
||||||
}
|
}
|
||||||
|
@ -595,8 +609,6 @@ public class Result extends OafEntity implements Serializable {
|
||||||
* @return the result map
|
* @return the result map
|
||||||
*/
|
*/
|
||||||
public static Map<String, Instance> toInstanceMap(final List<Instance> ri) {
|
public static Map<String, Instance> toInstanceMap(final List<Instance> ri) {
|
||||||
|
|
||||||
|
|
||||||
return ri
|
return ri
|
||||||
.stream()
|
.stream()
|
||||||
.filter(i -> i.getPid() != null || i.getAlternateIdentifier() != null)
|
.filter(i -> i.getPid() != null || i.getAlternateIdentifier() != null)
|
||||||
|
@ -622,7 +634,7 @@ public class Result extends OafEntity implements Serializable {
|
||||||
* @param enrichments the List of enrichment instances having the same pid
|
* @param enrichments the List of enrichment instances having the same pid
|
||||||
* @return the list
|
* @return the list
|
||||||
*/
|
*/
|
||||||
private static List<Instance> findEnrichmentsByPID(final List<StructuredProperty> pids, final Map<String,Instance> enrichments) {
|
private static List<Instance> findEnrichmentsByPID(final List<Pid> pids, final Map<String,Instance> enrichments) {
|
||||||
if (pids == null || enrichments == null)
|
if (pids == null || enrichments == null)
|
||||||
return null;
|
return null;
|
||||||
return pids
|
return pids
|
||||||
|
@ -633,6 +645,25 @@ public class Result extends OafEntity implements Serializable {
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This utility method finds the list of enrichment instances
|
||||||
|
* that match one or more PIDs in the input list
|
||||||
|
*
|
||||||
|
* @param pids the list of PIDs
|
||||||
|
* @param enrichments the List of enrichment instances having the same pid
|
||||||
|
* @return the list
|
||||||
|
*/
|
||||||
|
private static List<Instance> findEnrichmentsByAlternateIdentifier(final List<StructuredProperty> pids, final Map<String,Instance> enrichments) {
|
||||||
|
if (pids == null || enrichments == null)
|
||||||
|
return null;
|
||||||
|
return pids
|
||||||
|
.stream()
|
||||||
|
.map(Result::extractKeyFromAltId)
|
||||||
|
.map(enrichments::get)
|
||||||
|
.filter(Objects::nonNull)
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This method apply enrichment on a single instance
|
* This method apply enrichment on a single instance
|
||||||
* The enrichment consists of replacing values on
|
* The enrichment consists of replacing values on
|
||||||
|
@ -712,11 +743,11 @@ public class Result extends OafEntity implements Serializable {
|
||||||
|
|
||||||
toEnrichInstances.forEach(i -> {
|
toEnrichInstances.forEach(i -> {
|
||||||
final List<Instance> e = findEnrichmentsByPID(i.getPid(), ri);
|
final List<Instance> e = findEnrichmentsByPID(i.getPid(), ri);
|
||||||
if (e!= null && e.size()> 0) {
|
if (e!= null && !e.isEmpty()) {
|
||||||
e.forEach(enr -> applyEnrichment(i, enr));
|
e.forEach(enr -> applyEnrichment(i, enr));
|
||||||
} else {
|
} else {
|
||||||
final List<Instance> a = findEnrichmentsByPID(i.getAlternateIdentifier(), ri);
|
final List<Instance> a = findEnrichmentsByAlternateIdentifier(i.getAlternateIdentifier(), ri);
|
||||||
if (a!= null && a.size()> 0) {
|
if (a!= null && !a.isEmpty()) {
|
||||||
a.forEach(enr -> applyEnrichment(i, enr));
|
a.forEach(enr -> applyEnrichment(i, enr));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -8,6 +8,7 @@ import java.util.Set;
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Pid;
|
||||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
|
|
||||||
public class CleaningFunctions {
|
public class CleaningFunctions {
|
||||||
|
@ -29,7 +30,7 @@ public class CleaningFunctions {
|
||||||
* @param s the PID whose value will be checked.
|
* @param s the PID whose value will be checked.
|
||||||
* @return false if the pid matches the filter criteria, true otherwise.
|
* @return false if the pid matches the filter criteria, true otherwise.
|
||||||
*/
|
*/
|
||||||
public static boolean pidFilter(StructuredProperty s) {
|
public static boolean pidFilter(Pid s) {
|
||||||
final String pidValue = s.getValue();
|
final String pidValue = s.getValue();
|
||||||
if (Objects.isNull(s.getQualifier()) ||
|
if (Objects.isNull(s.getQualifier()) ||
|
||||||
StringUtils.isBlank(pidValue) ||
|
StringUtils.isBlank(pidValue) ||
|
||||||
|
@ -47,7 +48,7 @@ public class CleaningFunctions {
|
||||||
* @param pid the PID whose value will be normalised.
|
* @param pid the PID whose value will be normalised.
|
||||||
* @return the PID containing the normalised value.
|
* @return the PID containing the normalised value.
|
||||||
*/
|
*/
|
||||||
public static StructuredProperty normalizePidValue(StructuredProperty pid) {
|
public static Pid normalizePidValue(Pid pid) {
|
||||||
pid.setValue(
|
pid.setValue(
|
||||||
normalizePidValue(
|
normalizePidValue(
|
||||||
pid.getQualifier().getClassid(),
|
pid.getQualifier().getClassid(),
|
||||||
|
@ -56,6 +57,20 @@ public class CleaningFunctions {
|
||||||
return pid;
|
return pid;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Utility method that normalises PID values on a per-type basis.
|
||||||
|
* @param pid the PID whose value will be normalised.
|
||||||
|
* @return the PID containing the normalised value.
|
||||||
|
*/
|
||||||
|
public static StructuredProperty normalizeSPValue(StructuredProperty pid) {
|
||||||
|
pid.setValue(
|
||||||
|
normalizePidValue(
|
||||||
|
pid.getQualifier().getClassid(),
|
||||||
|
pid.getValue()));
|
||||||
|
|
||||||
|
return pid;
|
||||||
|
}
|
||||||
|
|
||||||
public static String normalizePidValue(String pidType, String pidValue) {
|
public static String normalizePidValue(String pidType, String pidValue) {
|
||||||
String value = Optional
|
String value = Optional
|
||||||
.ofNullable(pidValue)
|
.ofNullable(pidValue)
|
||||||
|
|
|
@ -95,7 +95,7 @@ public class IdentifierFactory implements Serializable {
|
||||||
.collect(Collectors.toCollection(HashSet::new));
|
.collect(Collectors.toCollection(HashSet::new));
|
||||||
}
|
}
|
||||||
|
|
||||||
public static List<StructuredProperty> getPids(List<StructuredProperty> pid, KeyValue collectedFrom) {
|
public static List<StructuredProperty> getPids(List<Pid> pid, KeyValue collectedFrom) {
|
||||||
return pidFromInstance(pid, collectedFrom, true).distinct().collect(Collectors.toList());
|
return pidFromInstance(pid, collectedFrom, true).distinct().collect(Collectors.toList());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -143,7 +143,7 @@ public class IdentifierFactory implements Serializable {
|
||||||
|
|
||||||
checkArgument(StringUtils.isNoneBlank(entity.getId()), "missing entity identifier");
|
checkArgument(StringUtils.isNoneBlank(entity.getId()), "missing entity identifier");
|
||||||
|
|
||||||
final Map<String, Set<StructuredProperty>> pids = extractPids(entity);
|
final Map<String, Set<Pid>> pids = extractPids(entity);
|
||||||
|
|
||||||
return pids
|
return pids
|
||||||
.values()
|
.values()
|
||||||
|
@ -164,7 +164,7 @@ public class IdentifierFactory implements Serializable {
|
||||||
.orElseGet(entity::getId);
|
.orElseGet(entity::getId);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static <T extends OafEntity> Map<String, Set<StructuredProperty>> extractPids(T entity) {
|
private static <T extends OafEntity> Map<String, Set<Pid>> extractPids(T entity) {
|
||||||
if (entity instanceof Result) {
|
if (entity instanceof Result) {
|
||||||
return Optional
|
return Optional
|
||||||
.ofNullable(((Result) entity).getInstance())
|
.ofNullable(((Result) entity).getInstance())
|
||||||
|
@ -184,7 +184,7 @@ public class IdentifierFactory implements Serializable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Map<String, Set<StructuredProperty>> mapPids(List<Instance> instance) {
|
private static Map<String, Set<Pid>> mapPids(List<Instance> instance) {
|
||||||
return instance
|
return instance
|
||||||
.stream()
|
.stream()
|
||||||
.map(i -> pidFromInstance(i.getPid(), i.getCollectedfrom(), false))
|
.map(i -> pidFromInstance(i.getPid(), i.getCollectedfrom(), false))
|
||||||
|
@ -196,7 +196,7 @@ public class IdentifierFactory implements Serializable {
|
||||||
Collectors.mapping(p -> p, Collectors.toCollection(HashSet::new))));
|
Collectors.mapping(p -> p, Collectors.toCollection(HashSet::new))));
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Stream<StructuredProperty> pidFromInstance(List<StructuredProperty> pid, KeyValue collectedFrom,
|
private static Stream<Pid> pidFromInstance(List<Pid> pid, KeyValue collectedFrom,
|
||||||
boolean mapHandles) {
|
boolean mapHandles) {
|
||||||
return Optional
|
return Optional
|
||||||
.ofNullable(pid)
|
.ofNullable(pid)
|
||||||
|
|
|
@ -4,12 +4,13 @@ package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Pid;
|
||||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
|
|
||||||
public class PidValueComparator implements Comparator<StructuredProperty> {
|
public class PidValueComparator implements Comparator<Pid> {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int compare(StructuredProperty left, StructuredProperty right) {
|
public int compare(Pid left, Pid right) {
|
||||||
|
|
||||||
if (left == null && right == null)
|
if (left == null && right == null)
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -18,15 +19,15 @@ public class PidValueComparator implements Comparator<StructuredProperty> {
|
||||||
if (right == null)
|
if (right == null)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
StructuredProperty l = CleaningFunctions.normalizePidValue(left);
|
Pid l = CleaningFunctions.normalizePidValue(left);
|
||||||
StructuredProperty r = CleaningFunctions.normalizePidValue(right);
|
Pid r = CleaningFunctions.normalizePidValue(right);
|
||||||
|
|
||||||
return Optional
|
return Optional
|
||||||
.ofNullable(l.getValue())
|
.ofNullable(l.getValue())
|
||||||
.map(
|
.map(
|
||||||
lv -> Optional
|
lv -> Optional
|
||||||
.ofNullable(r.getValue())
|
.ofNullable(r.getValue())
|
||||||
.map(rv -> lv.compareTo(rv))
|
.map(lv::compareTo)
|
||||||
.orElse(-1))
|
.orElse(-1))
|
||||||
.orElse(1);
|
.orElse(1);
|
||||||
}
|
}
|
||||||
|
|
|
@ -109,10 +109,20 @@ class MergeTest {
|
||||||
final Result currentPub = source.get(i);
|
final Result currentPub = source.get(i);
|
||||||
final Result currentEnrichment = enrichment.get(i);
|
final Result currentEnrichment = enrichment.get(i);
|
||||||
final Instance currentInstance = Objects.requireNonNull(currentPub.getInstance()).get(0);
|
final Instance currentInstance = Objects.requireNonNull(currentPub.getInstance()).get(0);
|
||||||
if (overrideAlternateIdentifier)
|
final List<Pid> pid = Objects.requireNonNull(currentEnrichment.getInstance()).get(0).getPid();
|
||||||
currentInstance.setAlternateIdentifier(Objects.requireNonNull(currentEnrichment.getInstance()).get(0).getPid());
|
if (overrideAlternateIdentifier) {
|
||||||
else
|
currentInstance.setAlternateIdentifier(pid.stream()
|
||||||
currentInstance.setPid(Objects.requireNonNull(currentEnrichment.getInstance()).get(0).getPid());
|
.map(p -> {
|
||||||
|
StructuredProperty sp = new StructuredProperty();
|
||||||
|
sp.setValue(p.getValue());
|
||||||
|
sp.setQualifier(p.getQualifier());
|
||||||
|
sp.setDataInfo(p.getDataInfo());
|
||||||
|
return sp;
|
||||||
|
})
|
||||||
|
.collect(Collectors.toList()));
|
||||||
|
} else {
|
||||||
|
currentInstance.setPid(pid);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue