master #59
|
@ -5,13 +5,13 @@ import java.util.*;
|
||||||
import java.util.function.Function;
|
import java.util.function.Function;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import com.google.common.collect.Sets;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.PidBlacklistProvider;
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
import com.clearspring.analytics.util.Lists;
|
import com.clearspring.analytics.util.Lists;
|
||||||
|
import com.google.common.collect.Sets;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.utils.PidBlacklistProvider;
|
||||||
|
|
||||||
public class CleaningFunctions {
|
public class CleaningFunctions {
|
||||||
|
|
||||||
|
@ -148,16 +148,18 @@ public class CleaningFunctions {
|
||||||
}
|
}
|
||||||
if (Objects.nonNull(r.getInstance())) {
|
if (Objects.nonNull(r.getInstance())) {
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
for (Instance i : r.getInstance()) {
|
for (Instance i : r.getInstance()) {
|
||||||
final Set<StructuredProperty> pids = Sets.newHashSet(i.getPid());
|
final Set<StructuredProperty> pids = Sets.newHashSet(i.getPid());
|
||||||
i.setAlternateIdentifier(
|
i
|
||||||
Optional.ofNullable(i.getAlternateIdentifier())
|
.setAlternateIdentifier(
|
||||||
.map(altId -> altId.stream()
|
Optional
|
||||||
.filter(p -> !pids.contains(p))
|
.ofNullable(i.getAlternateIdentifier())
|
||||||
.collect(Collectors.toList()))
|
.map(
|
||||||
.orElse(Lists.newArrayList()));
|
altId -> altId
|
||||||
|
.stream()
|
||||||
|
.filter(p -> !pids.contains(p))
|
||||||
|
.collect(Collectors.toList()))
|
||||||
|
.orElse(Lists.newArrayList()));
|
||||||
|
|
||||||
if (Objects.isNull(i.getAccessright()) || StringUtils.isBlank(i.getAccessright().getClassid())) {
|
if (Objects.isNull(i.getAccessright()) || StringUtils.isBlank(i.getAccessright().getClassid())) {
|
||||||
i
|
i
|
||||||
|
@ -236,7 +238,8 @@ public class CleaningFunctions {
|
||||||
}
|
}
|
||||||
|
|
||||||
private static List<StructuredProperty> processPidCleaning(List<StructuredProperty> pids) {
|
private static List<StructuredProperty> processPidCleaning(List<StructuredProperty> pids) {
|
||||||
return pids.stream()
|
return pids
|
||||||
|
.stream()
|
||||||
.filter(Objects::nonNull)
|
.filter(Objects::nonNull)
|
||||||
.filter(sp -> StringUtils.isNotBlank(StringUtils.trim(sp.getValue())))
|
.filter(sp -> StringUtils.isNotBlank(StringUtils.trim(sp.getValue())))
|
||||||
.filter(sp -> !PID_BLACKLIST.contains(sp.getValue().trim().toLowerCase()))
|
.filter(sp -> !PID_BLACKLIST.contains(sp.getValue().trim().toLowerCase()))
|
||||||
|
@ -286,8 +289,8 @@ public class CleaningFunctions {
|
||||||
public static boolean pidFilter(StructuredProperty s) {
|
public static boolean pidFilter(StructuredProperty s) {
|
||||||
final String pidValue = s.getValue();
|
final String pidValue = s.getValue();
|
||||||
if (Objects.isNull(s.getQualifier()) ||
|
if (Objects.isNull(s.getQualifier()) ||
|
||||||
StringUtils.isBlank(pidValue) ||
|
StringUtils.isBlank(pidValue) ||
|
||||||
StringUtils.isBlank(pidValue.replaceAll("(?:\\n|\\r|\\t|\\s)", ""))) {
|
StringUtils.isBlank(pidValue.replaceAll("(?:\\n|\\r|\\t|\\s)", ""))) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (CleaningFunctions.PID_BLACKLIST.contains(pidValue)) {
|
if (CleaningFunctions.PID_BLACKLIST.contains(pidValue)) {
|
||||||
|
|
|
@ -71,7 +71,7 @@ public class IdentifierFactory implements Serializable {
|
||||||
.stream()
|
.stream()
|
||||||
.filter(Objects::nonNull)
|
.filter(Objects::nonNull)
|
||||||
.filter(s -> s.getQualifier() != null && "doi".equalsIgnoreCase(s.getQualifier().getClassid()))
|
.filter(s -> s.getQualifier() != null && "doi".equalsIgnoreCase(s.getQualifier().getClassid()))
|
||||||
.filter(IdentifierFactory::pidFilter)
|
.filter(CleaningFunctions::pidFilter)
|
||||||
.findAny()
|
.findAny()
|
||||||
.orElse(null);
|
.orElse(null);
|
||||||
} else {
|
} else {
|
||||||
|
@ -81,7 +81,7 @@ public class IdentifierFactory implements Serializable {
|
||||||
.stream()
|
.stream()
|
||||||
.filter(i -> i.getPid() != null)
|
.filter(i -> i.getPid() != null)
|
||||||
.flatMap(i -> i.getPid().stream())
|
.flatMap(i -> i.getPid().stream())
|
||||||
.filter(IdentifierFactory::pidFilter)
|
.filter(CleaningFunctions::pidFilter)
|
||||||
.findAny()
|
.findAny()
|
||||||
.orElse(null);
|
.orElse(null);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue