master #59

Closed
claudio.atzori wants to merge 3221 commits from master into stable_ids
2 changed files with 18 additions and 15 deletions
Showing only changes of commit 3256b9c836 - Show all commits

View File

@ -5,13 +5,13 @@ import java.util.*;
import java.util.function.Function; import java.util.function.Function;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import com.google.common.collect.Sets;
import eu.dnetlib.dhp.schema.oaf.utils.PidBlacklistProvider;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import com.clearspring.analytics.util.Lists; import com.clearspring.analytics.util.Lists;
import com.google.common.collect.Sets;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.utils.PidBlacklistProvider;
public class CleaningFunctions { public class CleaningFunctions {
@ -148,16 +148,18 @@ public class CleaningFunctions {
} }
if (Objects.nonNull(r.getInstance())) { if (Objects.nonNull(r.getInstance())) {
for (Instance i : r.getInstance()) { for (Instance i : r.getInstance()) {
final Set<StructuredProperty> pids = Sets.newHashSet(i.getPid()); final Set<StructuredProperty> pids = Sets.newHashSet(i.getPid());
i.setAlternateIdentifier( i
Optional.ofNullable(i.getAlternateIdentifier()) .setAlternateIdentifier(
.map(altId -> altId.stream() Optional
.filter(p -> !pids.contains(p)) .ofNullable(i.getAlternateIdentifier())
.collect(Collectors.toList())) .map(
.orElse(Lists.newArrayList())); altId -> altId
.stream()
.filter(p -> !pids.contains(p))
.collect(Collectors.toList()))
.orElse(Lists.newArrayList()));
if (Objects.isNull(i.getAccessright()) || StringUtils.isBlank(i.getAccessright().getClassid())) { if (Objects.isNull(i.getAccessright()) || StringUtils.isBlank(i.getAccessright().getClassid())) {
i i
@ -236,7 +238,8 @@ public class CleaningFunctions {
} }
private static List<StructuredProperty> processPidCleaning(List<StructuredProperty> pids) { private static List<StructuredProperty> processPidCleaning(List<StructuredProperty> pids) {
return pids.stream() return pids
.stream()
.filter(Objects::nonNull) .filter(Objects::nonNull)
.filter(sp -> StringUtils.isNotBlank(StringUtils.trim(sp.getValue()))) .filter(sp -> StringUtils.isNotBlank(StringUtils.trim(sp.getValue())))
.filter(sp -> !PID_BLACKLIST.contains(sp.getValue().trim().toLowerCase())) .filter(sp -> !PID_BLACKLIST.contains(sp.getValue().trim().toLowerCase()))
@ -286,8 +289,8 @@ public class CleaningFunctions {
public static boolean pidFilter(StructuredProperty s) { public static boolean pidFilter(StructuredProperty s) {
final String pidValue = s.getValue(); final String pidValue = s.getValue();
if (Objects.isNull(s.getQualifier()) || if (Objects.isNull(s.getQualifier()) ||
StringUtils.isBlank(pidValue) || StringUtils.isBlank(pidValue) ||
StringUtils.isBlank(pidValue.replaceAll("(?:\\n|\\r|\\t|\\s)", ""))) { StringUtils.isBlank(pidValue.replaceAll("(?:\\n|\\r|\\t|\\s)", ""))) {
return false; return false;
} }
if (CleaningFunctions.PID_BLACKLIST.contains(pidValue)) { if (CleaningFunctions.PID_BLACKLIST.contains(pidValue)) {

View File

@ -71,7 +71,7 @@ public class IdentifierFactory implements Serializable {
.stream() .stream()
.filter(Objects::nonNull) .filter(Objects::nonNull)
.filter(s -> s.getQualifier() != null && "doi".equalsIgnoreCase(s.getQualifier().getClassid())) .filter(s -> s.getQualifier() != null && "doi".equalsIgnoreCase(s.getQualifier().getClassid()))
.filter(IdentifierFactory::pidFilter) .filter(CleaningFunctions::pidFilter)
.findAny() .findAny()
.orElse(null); .orElse(null);
} else { } else {
@ -81,7 +81,7 @@ public class IdentifierFactory implements Serializable {
.stream() .stream()
.filter(i -> i.getPid() != null) .filter(i -> i.getPid() != null)
.flatMap(i -> i.getPid().stream()) .flatMap(i -> i.getPid().stream())
.filter(IdentifierFactory::pidFilter) .filter(CleaningFunctions::pidFilter)
.findAny() .findAny()
.orElse(null); .orElse(null);
} }