forked from D-Net/dnet-hadoop
implemented default merge procedure applied to result.instance
This commit is contained in:
parent
c8683eb13c
commit
9fc70a9451
|
@ -1,14 +1,15 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.schema.oaf.utils;
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
|
||||||
|
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
|
|
||||||
public class CleaningFunctions {
|
public class CleaningFunctions {
|
||||||
|
|
||||||
public static final String DOI_PREFIX_REGEX = "(^10\\.|\\/10\\.)";
|
public static final String DOI_PREFIX_REGEX = "(^10\\.|\\/10\\.)";
|
||||||
|
@ -21,7 +22,8 @@ public class CleaningFunctions {
|
||||||
PID_BLACKLIST.add("na");
|
PID_BLACKLIST.add("na");
|
||||||
}
|
}
|
||||||
|
|
||||||
public CleaningFunctions() {}
|
public CleaningFunctions() {
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Utility method that filter PID values on a per-type basis.
|
* Utility method that filter PID values on a per-type basis.
|
||||||
|
@ -47,7 +49,8 @@ public class CleaningFunctions {
|
||||||
* @return the PID containing the normalised value.
|
* @return the PID containing the normalised value.
|
||||||
*/
|
*/
|
||||||
public static StructuredProperty normalizePidValue(StructuredProperty pid) {
|
public static StructuredProperty normalizePidValue(StructuredProperty pid) {
|
||||||
pid.setValue(
|
pid
|
||||||
|
.setValue(
|
||||||
normalizePidValue(
|
normalizePidValue(
|
||||||
pid.getQualifier().getClassid(),
|
pid.getQualifier().getClassid(),
|
||||||
pid.getValue()));
|
pid.getValue()));
|
||||||
|
@ -57,9 +60,9 @@ public class CleaningFunctions {
|
||||||
|
|
||||||
public static String normalizePidValue(String pidType, String pidValue) {
|
public static String normalizePidValue(String pidType, String pidValue) {
|
||||||
String value = Optional
|
String value = Optional
|
||||||
.ofNullable(pidValue)
|
.ofNullable(pidValue)
|
||||||
.map(String::trim)
|
.map(String::trim)
|
||||||
.orElseThrow(() -> new IllegalArgumentException("PID value cannot be empty"));
|
.orElseThrow(() -> new IllegalArgumentException("PID value cannot be empty"));
|
||||||
|
|
||||||
switch (pidType) {
|
switch (pidType) {
|
||||||
|
|
||||||
|
|
|
@ -1,12 +1,8 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.schema.oaf.utils;
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
import com.google.common.collect.HashBiMap;
|
import static com.google.common.base.Preconditions.checkArgument;
|
||||||
import com.google.common.collect.Maps;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
|
||||||
import org.apache.commons.codec.binary.Hex;
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
|
@ -16,8 +12,14 @@ import java.util.function.Function;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
import java.util.stream.Stream;
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
import static com.google.common.base.Preconditions.checkArgument;
|
import org.apache.commons.codec.binary.Hex;
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
|
import com.google.common.collect.HashBiMap;
|
||||||
|
import com.google.common.collect.Maps;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Factory class for OpenAIRE identifiers in the Graph
|
* Factory class for OpenAIRE identifiers in the Graph
|
||||||
|
@ -87,10 +89,11 @@ public class IdentifierFactory implements Serializable {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static Set<String> delegatedAuthorityDatasourceIds() {
|
public static Set<String> delegatedAuthorityDatasourceIds() {
|
||||||
return DELEGATED_PID_AUTHORITY.values()
|
return DELEGATED_PID_AUTHORITY
|
||||||
.stream()
|
.values()
|
||||||
.flatMap(m -> m.keySet().stream())
|
.stream()
|
||||||
.collect(Collectors.toCollection(HashSet::new));
|
.flatMap(m -> m.keySet().stream())
|
||||||
|
.collect(Collectors.toCollection(HashSet::new));
|
||||||
}
|
}
|
||||||
|
|
||||||
public static List<StructuredProperty> getPids(List<StructuredProperty> pid, KeyValue collectedFrom) {
|
public static List<StructuredProperty> getPids(List<StructuredProperty> pid, KeyValue collectedFrom) {
|
||||||
|
@ -210,7 +213,6 @@ public class IdentifierFactory implements Serializable {
|
||||||
.orElse(Stream.empty());
|
.orElse(Stream.empty());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private static boolean shouldFilterPidByCriteria(KeyValue collectedFrom, StructuredProperty p, boolean mapHandles) {
|
private static boolean shouldFilterPidByCriteria(KeyValue collectedFrom, StructuredProperty p, boolean mapHandles) {
|
||||||
final PidType pType = PidType.tryValueOf(p.getQualifier().getClassid());
|
final PidType pType = PidType.tryValueOf(p.getQualifier().getClassid());
|
||||||
|
|
||||||
|
@ -219,16 +221,18 @@ public class IdentifierFactory implements Serializable {
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean isEnrich = Optional
|
boolean isEnrich = Optional
|
||||||
.ofNullable(ENRICHMENT_PROVIDER.get(pType))
|
.ofNullable(ENRICHMENT_PROVIDER.get(pType))
|
||||||
.map(enrich -> enrich.containsKey(collectedFrom.getKey())
|
.map(
|
||||||
|| enrich.containsValue(collectedFrom.getValue()))
|
enrich -> enrich.containsKey(collectedFrom.getKey())
|
||||||
.orElse(false);
|
|| enrich.containsValue(collectedFrom.getValue()))
|
||||||
|
.orElse(false);
|
||||||
|
|
||||||
boolean isAuthority = Optional
|
boolean isAuthority = Optional
|
||||||
.ofNullable(PID_AUTHORITY.get(pType))
|
.ofNullable(PID_AUTHORITY.get(pType))
|
||||||
.map(authorities -> authorities.containsKey(collectedFrom.getKey())
|
.map(
|
||||||
|| authorities.containsValue(collectedFrom.getValue()))
|
authorities -> authorities.containsKey(collectedFrom.getKey())
|
||||||
.orElse(false);
|
|| authorities.containsValue(collectedFrom.getValue()))
|
||||||
|
.orElse(false);
|
||||||
|
|
||||||
return (mapHandles && pType.equals(PidType.handle)) || isEnrich || isAuthority;
|
return (mapHandles && pType.equals(PidType.handle)) || isEnrich || isAuthority;
|
||||||
}
|
}
|
||||||
|
@ -260,12 +264,12 @@ public class IdentifierFactory implements Serializable {
|
||||||
|
|
||||||
public static String idFromPid(String numericPrefix, String pidType, String pidValue, boolean md5) {
|
public static String idFromPid(String numericPrefix, String pidType, String pidValue, boolean md5) {
|
||||||
return new StringBuilder()
|
return new StringBuilder()
|
||||||
.append(numericPrefix)
|
.append(numericPrefix)
|
||||||
.append(ID_PREFIX_SEPARATOR)
|
.append(ID_PREFIX_SEPARATOR)
|
||||||
.append(createPrefix(pidType))
|
.append(createPrefix(pidType))
|
||||||
.append(ID_SEPARATOR)
|
.append(ID_SEPARATOR)
|
||||||
.append(md5 ? md5(pidValue) : pidValue)
|
.append(md5 ? md5(pidValue) : pidValue)
|
||||||
.toString();
|
.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
// create the prefix (length = 12)
|
// create the prefix (length = 12)
|
||||||
|
|
|
@ -1,22 +1,24 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.schema.oaf.utils;
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
import static org.apache.commons.lang3.ObjectUtils.firstNonNull;
|
|
||||||
import static com.google.common.base.Preconditions.checkArgument;
|
import static com.google.common.base.Preconditions.checkArgument;
|
||||||
|
import static org.apache.commons.lang3.ObjectUtils.firstNonNull;
|
||||||
|
|
||||||
import java.text.ParseException;
|
import java.text.ParseException;
|
||||||
|
import java.time.ZoneId;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.function.BinaryOperator;
|
import java.util.function.BinaryOperator;
|
||||||
import java.util.function.Function;
|
import java.util.function.Function;
|
||||||
import java.util.function.Supplier;
|
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
import java.util.stream.Stream;
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
import com.google.common.base.Joiner;
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.apache.commons.lang3.tuple.ImmutablePair;
|
import org.apache.commons.lang3.tuple.ImmutablePair;
|
||||||
import org.apache.commons.lang3.tuple.Pair;
|
import org.apache.commons.lang3.tuple.Pair;
|
||||||
|
|
||||||
|
import com.github.sisyphsu.dateparser.DateParserUtils;
|
||||||
|
import com.google.common.base.Joiner;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.common.AccessRightComparator;
|
import eu.dnetlib.dhp.schema.common.AccessRightComparator;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
|
@ -173,7 +175,8 @@ public class MergeUtils {
|
||||||
return a || b;
|
return a || b;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static <T, K> List<T> mergeLists(final List<T> left, final List<T> right, int trust, Function<T, K> keyExtractor, BinaryOperator<T> merger) {
|
private static <T, K> List<T> mergeLists(final List<T> left, final List<T> right, int trust,
|
||||||
|
Function<T, K> keyExtractor, BinaryOperator<T> merger) {
|
||||||
if (left == null) {
|
if (left == null) {
|
||||||
return right;
|
return right;
|
||||||
} else if (right == null) {
|
} else if (right == null) {
|
||||||
|
@ -184,11 +187,11 @@ public class MergeUtils {
|
||||||
List<T> l = trust >= 0 ? right : left;
|
List<T> l = trust >= 0 ? right : left;
|
||||||
|
|
||||||
return new ArrayList<>(Stream
|
return new ArrayList<>(Stream
|
||||||
.concat(h.stream(), l.stream())
|
.concat(h.stream(), l.stream())
|
||||||
.filter(Objects::nonNull)
|
.filter(Objects::nonNull)
|
||||||
.distinct()
|
.distinct()
|
||||||
.collect(Collectors.toMap(keyExtractor, v -> v, merger))
|
.collect(Collectors.toMap(keyExtractor, v -> v, merger))
|
||||||
.values());
|
.values());
|
||||||
}
|
}
|
||||||
|
|
||||||
private static <T, K> List<T> unionDistinctLists(final List<T> left, final List<T> right, int trust) {
|
private static <T, K> List<T> unionDistinctLists(final List<T> left, final List<T> right, int trust) {
|
||||||
|
@ -202,10 +205,10 @@ public class MergeUtils {
|
||||||
List<T> l = trust >= 0 ? right : left;
|
List<T> l = trust >= 0 ? right : left;
|
||||||
|
|
||||||
return Stream
|
return Stream
|
||||||
.concat(h.stream(), l.stream())
|
.concat(h.stream(), l.stream())
|
||||||
.filter(Objects::nonNull)
|
.filter(Objects::nonNull)
|
||||||
.distinct()
|
.distinct()
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
}
|
}
|
||||||
|
|
||||||
private static List<String> unionDistinctListOfString(final List<String> l, final List<String> r) {
|
private static List<String> unionDistinctListOfString(final List<String> l, final List<String> r) {
|
||||||
|
@ -402,11 +405,12 @@ public class MergeUtils {
|
||||||
// instance enrichment or union
|
// instance enrichment or union
|
||||||
// review instance equals => add pid to comparision
|
// review instance equals => add pid to comparision
|
||||||
if (!isAnEnrichment(merge) && !isAnEnrichment(enrich))
|
if (!isAnEnrichment(merge) && !isAnEnrichment(enrich))
|
||||||
merge.setInstance(
|
merge
|
||||||
mergeLists(merge.getInstance(), enrich.getInstance(), trust,
|
.setInstance(
|
||||||
MergeUtils::instanceKeyExtractor,
|
mergeLists(
|
||||||
MergeUtils::instanceMerger
|
merge.getInstance(), enrich.getInstance(), trust,
|
||||||
));
|
MergeUtils::instanceKeyExtractor,
|
||||||
|
MergeUtils::instanceMerger));
|
||||||
else {
|
else {
|
||||||
final List<Instance> enrichmentInstances = isAnEnrichment(merge) ? merge.getInstance()
|
final List<Instance> enrichmentInstances = isAnEnrichment(merge) ? merge.getInstance()
|
||||||
: enrich.getInstance();
|
: enrich.getInstance();
|
||||||
|
@ -428,12 +432,103 @@ public class MergeUtils {
|
||||||
}
|
}
|
||||||
|
|
||||||
private static String instanceKeyExtractor(Instance i) {
|
private static String instanceKeyExtractor(Instance i) {
|
||||||
return String.join("::",
|
return String
|
||||||
kvKeyExtractor(i.getHostedby()),
|
.join(
|
||||||
qualifierKeyExtractor(i.getAccessright()),
|
"::",
|
||||||
qualifierKeyExtractor(i.getInstancetype()),
|
kvKeyExtractor(i.getHostedby()),
|
||||||
Optional.ofNullable(i.getUrl()).map(u -> String.join("::", u)).orElse(null),
|
kvKeyExtractor(i.getCollectedfrom()),
|
||||||
Optional.ofNullable(i.getPid()).map(pp -> pp.stream().map(MergeUtils::spKeyExtractor).collect(Collectors.joining("::"))).orElse(null));
|
qualifierKeyExtractor(i.getAccessright()),
|
||||||
|
qualifierKeyExtractor(i.getInstancetype()),
|
||||||
|
Optional.ofNullable(i.getUrl()).map(u -> String.join("::", u)).orElse(null),
|
||||||
|
Optional
|
||||||
|
.ofNullable(i.getPid())
|
||||||
|
.map(pp -> pp.stream().map(MergeUtils::spKeyExtractor).collect(Collectors.joining("::")))
|
||||||
|
.orElse(null));
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Instance instanceMerger(Instance i1, Instance i2) {
|
||||||
|
Instance i = new Instance();
|
||||||
|
i.setHostedby(i1.getHostedby());
|
||||||
|
i.setCollectedfrom(i1.getCollectedfrom());
|
||||||
|
i.setAccessright(i1.getAccessright());
|
||||||
|
i.setInstancetype(i1.getInstancetype());
|
||||||
|
i.setPid(mergeLists(i1.getPid(), i2.getPid(), 0, MergeUtils::spKeyExtractor, (sp1, sp2) -> sp1));
|
||||||
|
i
|
||||||
|
.setAlternateIdentifier(
|
||||||
|
mergeLists(
|
||||||
|
i1.getAlternateIdentifier(), i2.getAlternateIdentifier(), 0, MergeUtils::spKeyExtractor,
|
||||||
|
(sp1, sp2) -> sp1));
|
||||||
|
|
||||||
|
i
|
||||||
|
.setRefereed(
|
||||||
|
Collections
|
||||||
|
.min(
|
||||||
|
Stream.of(i1.getRefereed(), i2.getRefereed()).collect(Collectors.toList()),
|
||||||
|
new RefereedComparator()));
|
||||||
|
i
|
||||||
|
.setInstanceTypeMapping(
|
||||||
|
mergeLists(
|
||||||
|
i1.getInstanceTypeMapping(), i2.getInstanceTypeMapping(), 0,
|
||||||
|
MergeUtils::instanceTypeMappingKeyExtractor, (itm1, itm2) -> itm1));
|
||||||
|
i.setFulltext(selectFulltext(i1.getFulltext(), i2.getFulltext()));
|
||||||
|
i.setDateofacceptance(selectOldestDate(i1.getDateofacceptance(), i2.getDateofacceptance()));
|
||||||
|
i.setLicense(firstNonNull(i1.getLicense(), i2.getLicense()));
|
||||||
|
i.setProcessingchargeamount(firstNonNull(i1.getProcessingchargeamount(), i2.getProcessingchargeamount()));
|
||||||
|
i.setProcessingchargecurrency(firstNonNull(i1.getProcessingchargecurrency(), i2.getProcessingchargecurrency()));
|
||||||
|
i
|
||||||
|
.setMeasures(
|
||||||
|
mergeLists(i1.getMeasures(), i2.getMeasures(), 0, MergeUtils::measureKeyExtractor, (m1, m2) -> m1));
|
||||||
|
|
||||||
|
i.setUrl(unionDistinctListOfString(i1.getUrl(), i2.getUrl()));
|
||||||
|
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String measureKeyExtractor(Measure m) {
|
||||||
|
return String
|
||||||
|
.join(
|
||||||
|
"::",
|
||||||
|
m.getId(),
|
||||||
|
m
|
||||||
|
.getUnit()
|
||||||
|
.stream()
|
||||||
|
.map(KeyValue::getKey)
|
||||||
|
.collect(Collectors.joining("::")));
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Field<String> selectOldestDate(Field<String> d1, Field<String> d2) {
|
||||||
|
return Stream
|
||||||
|
.of(d1, d2)
|
||||||
|
.filter(Objects::nonNull)
|
||||||
|
.min(
|
||||||
|
Comparator
|
||||||
|
.comparing(
|
||||||
|
f -> DateParserUtils
|
||||||
|
.parseDate(f.getValue())
|
||||||
|
.toInstant()
|
||||||
|
.atZone(ZoneId.systemDefault())
|
||||||
|
.toLocalDate()))
|
||||||
|
.orElse(d1);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String selectFulltext(String ft1, String ft2) {
|
||||||
|
if (StringUtils.endsWith(ft1, "pdf")) {
|
||||||
|
return ft1;
|
||||||
|
}
|
||||||
|
if (StringUtils.endsWith(ft2, "pdf")) {
|
||||||
|
return ft2;
|
||||||
|
}
|
||||||
|
return firstNonNull(ft1, ft2);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String instanceTypeMappingKeyExtractor(InstanceTypeMapping itm) {
|
||||||
|
return String
|
||||||
|
.join(
|
||||||
|
"::",
|
||||||
|
itm.getOriginalType(),
|
||||||
|
itm.getTypeCode(),
|
||||||
|
itm.getTypeLabel(),
|
||||||
|
itm.getVocabularyName());
|
||||||
}
|
}
|
||||||
|
|
||||||
private static String kvKeyExtractor(KeyValue kv) {
|
private static String kvKeyExtractor(KeyValue kv) {
|
||||||
|
@ -444,22 +539,17 @@ public class MergeUtils {
|
||||||
return Optional.ofNullable(q).map(Qualifier::getClassid).orElse(null);
|
return Optional.ofNullable(q).map(Qualifier::getClassid).orElse(null);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static <T> T FieldKeyExtractor(Field<T> f) {
|
private static <T> T fieldKeyExtractor(Field<T> f) {
|
||||||
return Optional.ofNullable(f).map(Field::getValue).orElse(null);
|
return Optional.ofNullable(f).map(Field::getValue).orElse(null);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static String spKeyExtractor(StructuredProperty sp) {
|
private static String spKeyExtractor(StructuredProperty sp) {
|
||||||
return Optional.ofNullable(sp).map(s -> Joiner.on("::").join(s, qualifierKeyExtractor(s.getQualifier()))).orElse(null);
|
return Optional
|
||||||
|
.ofNullable(sp)
|
||||||
|
.map(s -> Joiner.on("::").join(s, qualifierKeyExtractor(s.getQualifier())))
|
||||||
|
.orElse(null);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Instance instanceMerger(Instance i1, Instance i2) {
|
|
||||||
|
|
||||||
// TODO implement me!
|
|
||||||
|
|
||||||
return i1;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
private static <T extends OtherResearchProduct> T mergeORP(T original, T enrich) {
|
private static <T extends OtherResearchProduct> T mergeORP(T original, T enrich) {
|
||||||
int trust = compareTrust(original, enrich);
|
int trust = compareTrust(original, enrich);
|
||||||
final T merge = mergeResult(original, enrich);
|
final T merge = mergeResult(original, enrich);
|
||||||
|
|
|
@ -3,7 +3,8 @@ package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
public class ModelHardLimits {
|
public class ModelHardLimits {
|
||||||
|
|
||||||
private ModelHardLimits() {}
|
private ModelHardLimits() {
|
||||||
|
}
|
||||||
|
|
||||||
public static final String LAYOUT = "index";
|
public static final String LAYOUT = "index";
|
||||||
public static final String INTERPRETATION = "openaire";
|
public static final String INTERPRETATION = "openaire";
|
||||||
|
|
|
@ -1,10 +1,10 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.schema.oaf.utils;
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
|
||||||
|
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
|
|
||||||
public class OrganizationPidComparator implements Comparator<StructuredProperty> {
|
public class OrganizationPidComparator implements Comparator<StructuredProperty> {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -1,14 +1,15 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.schema.oaf.utils;
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
|
||||||
import org.apache.commons.io.IOUtils;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
public class PidBlacklistProvider {
|
public class PidBlacklistProvider {
|
||||||
|
|
||||||
private static final PidBlacklist blacklist;
|
private static final PidBlacklist blacklist;
|
||||||
|
@ -33,6 +34,7 @@ public class PidBlacklistProvider {
|
||||||
.orElse(new HashSet<>());
|
.orElse(new HashSet<>());
|
||||||
}
|
}
|
||||||
|
|
||||||
private PidBlacklistProvider() {}
|
private PidBlacklistProvider() {
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,14 +1,14 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.schema.oaf.utils;
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
|
import java.util.Comparator;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Organization;
|
import eu.dnetlib.dhp.schema.oaf.Organization;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
|
|
||||||
import java.util.Comparator;
|
|
||||||
|
|
||||||
public class PidComparator<T extends OafEntity> implements Comparator<StructuredProperty> {
|
public class PidComparator<T extends OafEntity> implements Comparator<StructuredProperty> {
|
||||||
|
|
||||||
private final T entity;
|
private final T entity;
|
||||||
|
|
|
@ -1,11 +1,11 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.schema.oaf.utils;
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
|
||||||
|
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
|
|
||||||
public class PidValueComparator implements Comparator<StructuredProperty> {
|
public class PidValueComparator implements Comparator<StructuredProperty> {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -0,0 +1,39 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
|
import java.util.Comparator;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Comparator for sorting the values from the dnet:review_levels vocabulary, implements the following ordering
|
||||||
|
*
|
||||||
|
* peerReviewed (0001) > nonPeerReviewed (0002) > UNKNOWN (0000)
|
||||||
|
*/
|
||||||
|
public class RefereedComparator implements Comparator<Qualifier> {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compare(Qualifier left, Qualifier right) {
|
||||||
|
|
||||||
|
String lClass = left.getClassid();
|
||||||
|
String rClass = right.getClassid();
|
||||||
|
|
||||||
|
if ("0001".equals(lClass))
|
||||||
|
return -1;
|
||||||
|
if ("0001".equals(rClass))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if ("0002".equals(lClass))
|
||||||
|
return -1;
|
||||||
|
if ("0002".equals(rClass))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if ("0000".equals(lClass))
|
||||||
|
return -1;
|
||||||
|
if ("0000".equals(rClass))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,10 +1,10 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.schema.oaf.utils;
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
|
||||||
|
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
|
|
||||||
public class ResultPidComparator implements Comparator<StructuredProperty> {
|
public class ResultPidComparator implements Comparator<StructuredProperty> {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -1,16 +1,16 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.schema.oaf.utils;
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.CROSSREF_ID;
|
||||||
import eu.dnetlib.dhp.schema.oaf.KeyValue;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
|
||||||
|
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.CROSSREF_ID;
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.KeyValue;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
|
|
||||||
public class ResultTypeComparator implements Comparator<Result> {
|
public class ResultTypeComparator implements Comparator<Result> {
|
||||||
|
|
||||||
|
|
|
@ -1,11 +1,11 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.schema.oaf.utils;
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
import org.junit.jupiter.api.Assertions;
|
import org.junit.jupiter.api.Assertions;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
import java.util.Set;
|
|
||||||
|
|
||||||
class BlackListProviderTest {
|
class BlackListProviderTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
|
@ -1,16 +1,18 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.schema.oaf.utils;
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.DeserializationFeature;
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Publication;
|
|
||||||
import org.apache.commons.io.IOUtils;
|
|
||||||
import org.junit.jupiter.api.Test;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
import org.apache.commons.io.IOUtils;
|
||||||
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.DeserializationFeature;
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||||
|
|
||||||
class IdentifierFactoryTest {
|
class IdentifierFactoryTest {
|
||||||
|
|
||||||
|
@ -42,7 +44,7 @@ class IdentifierFactoryTest {
|
||||||
"publication_pmc2.json", "50|pmc_________::94e4cb08c93f8733b48e2445d04002ac", true);
|
"publication_pmc2.json", "50|pmc_________::94e4cb08c93f8733b48e2445d04002ac", true);
|
||||||
|
|
||||||
verifyIdentifier(
|
verifyIdentifier(
|
||||||
"publication_openapc.json", "50|doi_________::79dbc7a2a56dc1532659f9038843256e", true);
|
"publication_openapc.json", "50|doi_________::79dbc7a2a56dc1532659f9038843256e", true);
|
||||||
|
|
||||||
final String defaultID = "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f";
|
final String defaultID = "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f";
|
||||||
verifyIdentifier("publication_3.json", defaultID, true);
|
verifyIdentifier("publication_3.json", defaultID, true);
|
||||||
|
@ -69,7 +71,7 @@ class IdentifierFactoryTest {
|
||||||
@Test
|
@Test
|
||||||
void testCreateIdentifierForROHub() throws IOException {
|
void testCreateIdentifierForROHub() throws IOException {
|
||||||
verifyIdentifier(
|
verifyIdentifier(
|
||||||
"orp-rohub.json", "50|w3id________::afc7592914ae190a50570db90f55f9c2", true);
|
"orp-rohub.json", "50|w3id________::afc7592914ae190a50570db90f55f9c2", true);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void verifyIdentifier(String filename, String expectedID, boolean md5) throws IOException {
|
protected void verifyIdentifier(String filename, String expectedID, boolean md5) throws IOException {
|
||||||
|
|
|
@ -10,24 +10,23 @@ import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import com.google.common.collect.Lists;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
|
||||||
import org.apache.commons.beanutils.BeanUtils;
|
import org.apache.commons.beanutils.BeanUtils;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.DeserializationFeature;
|
import com.fasterxml.jackson.databind.DeserializationFeature;
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
import com.google.common.collect.Lists;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
|
||||||
public class MergeUtilsTest {
|
public class MergeUtilsTest {
|
||||||
|
|
||||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper()
|
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper()
|
||||||
.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
|
.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
void testMergePubs_new() throws IOException {
|
void testMergePubs_new() throws IOException {
|
||||||
Publication pt = read("publication_test.json", Publication.class);
|
Publication pt = read("publication_test.json", Publication.class);
|
||||||
|
|
|
@ -178,10 +178,10 @@ class OafMapperUtilsTest {
|
||||||
|
|
||||||
assertEquals(
|
assertEquals(
|
||||||
ModelConstants.DATASET_RESULTTYPE_CLASSID,
|
ModelConstants.DATASET_RESULTTYPE_CLASSID,
|
||||||
((Result) MergeUtils
|
((Result) MergeUtils
|
||||||
.merge(p2, d1))
|
.merge(p2, d1))
|
||||||
.getResulttype()
|
.getResulttype()
|
||||||
.getClassid());
|
.getClassid());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
Loading…
Reference in New Issue