forked from D-Net/dnet-hadoop
refactoring: CleaningFunctions and OafMapperUtils moved in dhp-commong
This commit is contained in:
parent
8471888ad3
commit
86d6fbe95b
|
@ -1,8 +1,9 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.graph.clean;
|
package eu.dnetlib.dhp.schema.oaf;
|
||||||
|
|
||||||
import java.util.LinkedHashMap;
|
import java.util.LinkedHashMap;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
|
import java.util.Optional;
|
||||||
import java.util.function.Function;
|
import java.util.function.Function;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
@ -10,13 +11,13 @@ import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
import com.clearspring.analytics.util.Lists;
|
import com.clearspring.analytics.util.Lists;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.oa.graph.raw.AbstractMdRecordToOafMapper;
|
|
||||||
import eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils;
|
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
|
||||||
|
|
||||||
public class CleaningFunctions {
|
public class CleaningFunctions {
|
||||||
|
|
||||||
|
public static final String DOI_URL_PREFIX = "^http(s?):\\/\\/(dx\\.)?doi\\.org\\/";
|
||||||
public static final String ORCID_PREFIX_REGEX = "^http(s?):\\/\\/orcid\\.org\\/";
|
public static final String ORCID_PREFIX_REGEX = "^http(s?):\\/\\/orcid\\.org\\/";
|
||||||
public static final String NONE = "none";
|
public static final String NONE = "none";
|
||||||
|
|
||||||
|
@ -72,7 +73,7 @@ public class CleaningFunctions {
|
||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected static <T extends Oaf> T fixDefaults(T value) {
|
public static <T extends Oaf> T fixDefaults(T value) {
|
||||||
if (value instanceof Datasource) {
|
if (value instanceof Datasource) {
|
||||||
// nothing to clean here
|
// nothing to clean here
|
||||||
} else if (value instanceof Project) {
|
} else if (value instanceof Project) {
|
||||||
|
@ -118,10 +119,7 @@ public class CleaningFunctions {
|
||||||
.filter(sp -> NONE.equalsIgnoreCase(sp.getValue()))
|
.filter(sp -> NONE.equalsIgnoreCase(sp.getValue()))
|
||||||
.filter(sp -> Objects.nonNull(sp.getQualifier()))
|
.filter(sp -> Objects.nonNull(sp.getQualifier()))
|
||||||
.filter(sp -> StringUtils.isNotBlank(sp.getQualifier().getClassid()))
|
.filter(sp -> StringUtils.isNotBlank(sp.getQualifier().getClassid()))
|
||||||
.map(sp -> {
|
.map(CleaningFunctions::normalizePidValue)
|
||||||
sp.setValue(StringUtils.trim(sp.getValue()));
|
|
||||||
return sp;
|
|
||||||
})
|
|
||||||
.collect(Collectors.toList()));
|
.collect(Collectors.toList()));
|
||||||
}
|
}
|
||||||
if (Objects.isNull(r.getResourcetype()) || StringUtils.isBlank(r.getResourcetype().getClassid())) {
|
if (Objects.isNull(r.getResourcetype()) || StringUtils.isBlank(r.getResourcetype().getClassid())) {
|
||||||
|
@ -143,7 +141,7 @@ public class CleaningFunctions {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (Objects.isNull(r.getBestaccessright()) || StringUtils.isBlank(r.getBestaccessright().getClassid())) {
|
if (Objects.isNull(r.getBestaccessright()) || StringUtils.isBlank(r.getBestaccessright().getClassid())) {
|
||||||
Qualifier bestaccessrights = AbstractMdRecordToOafMapper.createBestAccessRights(r.getInstance());
|
Qualifier bestaccessrights = OafMapperUtils.createBestAccessRights(r.getInstance());
|
||||||
if (Objects.isNull(bestaccessrights)) {
|
if (Objects.isNull(bestaccessrights)) {
|
||||||
r
|
r
|
||||||
.setBestaccessright(
|
.setBestaccessright(
|
||||||
|
@ -219,4 +217,24 @@ public class CleaningFunctions {
|
||||||
classid, classname, scheme, scheme);
|
classid, classname, scheme, scheme);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Utility method that normalises PID values on a per-type basis.
|
||||||
|
* @param pid the PID whose value will be normalised.
|
||||||
|
* @return the PID containing the normalised value.
|
||||||
|
*/
|
||||||
|
public static StructuredProperty normalizePidValue(StructuredProperty pid) {
|
||||||
|
String value = Optional
|
||||||
|
.ofNullable(pid.getValue())
|
||||||
|
.map(String::trim)
|
||||||
|
.orElseThrow(() -> new IllegalArgumentException("PID value cannot be empty"));
|
||||||
|
switch (pid.getQualifier().getClassid()) {
|
||||||
|
|
||||||
|
// TODO add cleaning for more PID types as needed
|
||||||
|
case "doi":
|
||||||
|
pid.setValue(value.toLowerCase().replaceAll(DOI_URL_PREFIX, ""));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return pid;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
|
@ -1,11 +1,10 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.graph.raw.common;
|
package eu.dnetlib.dhp.schema.oaf;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
||||||
import java.util.Arrays;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_ACCESS_MODES;
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
import java.util.*;
|
||||||
import java.util.Objects;
|
|
||||||
import java.util.concurrent.ConcurrentHashMap;
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
import java.util.function.Function;
|
import java.util.function.Function;
|
||||||
import java.util.function.Predicate;
|
import java.util.function.Predicate;
|
||||||
|
@ -13,15 +12,7 @@ import java.util.stream.Collectors;
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.DataInfo;
|
import eu.dnetlib.dhp.schema.common.LicenseComparator;
|
||||||
import eu.dnetlib.dhp.schema.oaf.ExtraInfo;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Field;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Journal;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.KeyValue;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.OAIProvenance;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.OriginDescription;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
|
||||||
import eu.dnetlib.dhp.utils.DHPUtils;
|
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||||
|
|
||||||
public class OafMapperUtils {
|
public class OafMapperUtils {
|
||||||
|
@ -270,4 +261,36 @@ public class OafMapperUtils {
|
||||||
final Map<Object, Boolean> seen = new ConcurrentHashMap<>();
|
final Map<Object, Boolean> seen = new ConcurrentHashMap<>();
|
||||||
return t -> seen.putIfAbsent(keyExtractor.apply(t), Boolean.TRUE) == null;
|
return t -> seen.putIfAbsent(keyExtractor.apply(t), Boolean.TRUE) == null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static Qualifier createBestAccessRights(final List<Instance> instanceList) {
|
||||||
|
return getBestAccessRights(instanceList);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected static Qualifier getBestAccessRights(final List<Instance> instanceList) {
|
||||||
|
if (instanceList != null) {
|
||||||
|
final Optional<Qualifier> min = instanceList
|
||||||
|
.stream()
|
||||||
|
.map(i -> i.getAccessright())
|
||||||
|
.min(new LicenseComparator());
|
||||||
|
|
||||||
|
final Qualifier rights = min.isPresent() ? min.get() : new Qualifier();
|
||||||
|
|
||||||
|
if (StringUtils.isBlank(rights.getClassid())) {
|
||||||
|
rights.setClassid(UNKNOWN);
|
||||||
|
}
|
||||||
|
if (StringUtils.isBlank(rights.getClassname())
|
||||||
|
|| UNKNOWN.equalsIgnoreCase(rights.getClassname())) {
|
||||||
|
rights.setClassname(NOT_AVAILABLE);
|
||||||
|
}
|
||||||
|
if (StringUtils.isBlank(rights.getSchemeid())) {
|
||||||
|
rights.setSchemeid(DNET_ACCESS_MODES);
|
||||||
|
}
|
||||||
|
if (StringUtils.isBlank(rights.getSchemename())) {
|
||||||
|
rights.setSchemename(DNET_ACCESS_MODES);
|
||||||
|
}
|
||||||
|
|
||||||
|
return rights;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
}
|
}
|
|
@ -1,3 +1,4 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.schema.oaf;
|
package eu.dnetlib.dhp.schema.oaf;
|
||||||
|
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
|
@ -46,4 +47,3 @@ public class ResultTypeComparator implements Comparator<Result> {
|
||||||
return lClass.compareTo(rClass);
|
return lClass.compareTo(rClass);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -7,6 +7,7 @@ import java.util.Optional;
|
||||||
|
|
||||||
import org.apache.commons.lang.StringUtils;
|
import org.apache.commons.lang.StringUtils;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.CleaningFunctions;
|
||||||
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
||||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
import eu.dnetlib.dhp.utils.DHPUtils;
|
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||||
|
@ -16,8 +17,6 @@ import eu.dnetlib.dhp.utils.DHPUtils;
|
||||||
*/
|
*/
|
||||||
public class IdentifierFactory implements Serializable {
|
public class IdentifierFactory implements Serializable {
|
||||||
|
|
||||||
public static final String DOI_URL_PREFIX = "^http(s?):\\/\\/(dx\\.)?doi\\.org\\/";
|
|
||||||
|
|
||||||
public static final String ID_SEPARATOR = "::";
|
public static final String ID_SEPARATOR = "::";
|
||||||
public static final String ID_PREFIX_SEPARATOR = "|";
|
public static final String ID_PREFIX_SEPARATOR = "|";
|
||||||
public final static String ID_REGEX = "^[0-9][0-9]\\" + ID_PREFIX_SEPARATOR + ".{12}" + ID_SEPARATOR
|
public final static String ID_REGEX = "^[0-9][0-9]\\" + ID_PREFIX_SEPARATOR + ".{12}" + ID_SEPARATOR
|
||||||
|
@ -55,26 +54,6 @@ public class IdentifierFactory implements Serializable {
|
||||||
!NONE.equals(StringUtils.trim(StringUtils.lowerCase(s.getValue())));
|
!NONE.equals(StringUtils.trim(StringUtils.lowerCase(s.getValue())));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Utility method that normalises PID values on a per-type basis.
|
|
||||||
* @param pid the PID whose value will be normalised.
|
|
||||||
* @return the PID containing the normalised value.
|
|
||||||
*/
|
|
||||||
public static StructuredProperty normalizePidValue(StructuredProperty pid) {
|
|
||||||
String value = Optional
|
|
||||||
.ofNullable(pid.getValue())
|
|
||||||
.map(String::trim)
|
|
||||||
.orElseThrow(() -> new IllegalArgumentException("PID value cannot be empty"));
|
|
||||||
switch (pid.getQualifier().getClassid()) {
|
|
||||||
|
|
||||||
// TODO add cleaning for more PID types as needed
|
|
||||||
case "doi":
|
|
||||||
pid.setValue(value.toLowerCase().replaceAll(DOI_URL_PREFIX, ""));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
return pid;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static String verifyIdSyntax(String s) {
|
private static String verifyIdSyntax(String s) {
|
||||||
if (StringUtils.isBlank(s) || !s.matches(ID_REGEX)) {
|
if (StringUtils.isBlank(s) || !s.matches(ID_REGEX)) {
|
||||||
throw new RuntimeException(String.format("malformed id: '%s'", s));
|
throw new RuntimeException(String.format("malformed id: '%s'", s));
|
||||||
|
@ -89,7 +68,7 @@ public class IdentifierFactory implements Serializable {
|
||||||
.append(ID_PREFIX_SEPARATOR)
|
.append(ID_PREFIX_SEPARATOR)
|
||||||
.append(createPrefix(s.getQualifier().getClassid()))
|
.append(createPrefix(s.getQualifier().getClassid()))
|
||||||
.append(ID_SEPARATOR)
|
.append(ID_SEPARATOR)
|
||||||
.append(DHPUtils.md5(normalizePidValue(s).getValue()))
|
.append(DHPUtils.md5(CleaningFunctions.normalizePidValue(s).getValue()))
|
||||||
.toString();
|
.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -3,13 +3,9 @@ package eu.dnetlib.dhp.oa.graph.clean;
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
import java.io.BufferedInputStream;
|
|
||||||
import java.util.Objects;
|
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
import java.util.stream.Collectors;
|
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.function.MapFunction;
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
import org.apache.spark.sql.Dataset;
|
import org.apache.spark.sql.Dataset;
|
||||||
|
@ -23,10 +19,7 @@ import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.common.HdfsSupport;
|
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||||
import eu.dnetlib.dhp.oa.graph.raw.AbstractMdRecordToOafMapper;
|
|
||||||
import eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils;
|
|
||||||
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
|
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.graph.raw;
|
package eu.dnetlib.dhp.oa.graph.raw;
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.*;
|
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
||||||
|
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.*;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
@ -282,7 +282,7 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
r.setExternalReference(new ArrayList<>()); // NOT PRESENT IN MDSTORES
|
r.setExternalReference(new ArrayList<>()); // NOT PRESENT IN MDSTORES
|
||||||
|
|
||||||
r.setInstance(instances);
|
r.setInstance(instances);
|
||||||
r.setBestaccessright(getBestAccessRights(instances));
|
r.setBestaccessright(OafMapperUtils.createBestAccessRights(instances));
|
||||||
}
|
}
|
||||||
|
|
||||||
protected abstract List<StructuredProperty> prepareResultPids(Document doc, DataInfo info);
|
protected abstract List<StructuredProperty> prepareResultPids(Document doc, DataInfo info);
|
||||||
|
@ -367,38 +367,6 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
|
|
||||||
protected abstract Field<String> prepareDatasetStorageDate(Document doc, DataInfo info);
|
protected abstract Field<String> prepareDatasetStorageDate(Document doc, DataInfo info);
|
||||||
|
|
||||||
public static Qualifier createBestAccessRights(final List<Instance> instanceList) {
|
|
||||||
return getBestAccessRights(instanceList);
|
|
||||||
}
|
|
||||||
|
|
||||||
protected static Qualifier getBestAccessRights(final List<Instance> instanceList) {
|
|
||||||
if (instanceList != null) {
|
|
||||||
final Optional<Qualifier> min = instanceList
|
|
||||||
.stream()
|
|
||||||
.map(i -> i.getAccessright())
|
|
||||||
.min(new LicenseComparator());
|
|
||||||
|
|
||||||
final Qualifier rights = min.isPresent() ? min.get() : new Qualifier();
|
|
||||||
|
|
||||||
if (StringUtils.isBlank(rights.getClassid())) {
|
|
||||||
rights.setClassid(UNKNOWN);
|
|
||||||
}
|
|
||||||
if (StringUtils.isBlank(rights.getClassname())
|
|
||||||
|| UNKNOWN.equalsIgnoreCase(rights.getClassname())) {
|
|
||||||
rights.setClassname(NOT_AVAILABLE);
|
|
||||||
}
|
|
||||||
if (StringUtils.isBlank(rights.getSchemeid())) {
|
|
||||||
rights.setSchemeid(DNET_ACCESS_MODES);
|
|
||||||
}
|
|
||||||
if (StringUtils.isBlank(rights.getSchemename())) {
|
|
||||||
rights.setSchemename(DNET_ACCESS_MODES);
|
|
||||||
}
|
|
||||||
|
|
||||||
return rights;
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
private Journal prepareJournal(final Document doc, final DataInfo info) {
|
private Journal prepareJournal(final Document doc, final DataInfo info) {
|
||||||
final Node n = doc.selectSingleNode("//oaf:journal");
|
final Node n = doc.selectSingleNode("//oaf:journal");
|
||||||
if (n != null) {
|
if (n != null) {
|
||||||
|
|
|
@ -1,16 +1,6 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.graph.raw;
|
package eu.dnetlib.dhp.oa.graph.raw;
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.asString;
|
|
||||||
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.createOpenaireId;
|
|
||||||
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.dataInfo;
|
|
||||||
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.field;
|
|
||||||
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.journal;
|
|
||||||
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.listFields;
|
|
||||||
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.listKeyValues;
|
|
||||||
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.qualifier;
|
|
||||||
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty;
|
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.DATASET_DEFAULT_RESULTTYPE;
|
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.DATASOURCE_ORGANIZATION;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.DATASOURCE_ORGANIZATION;
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PROVENANCE_ACTIONS;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PROVENANCE_ACTIONS;
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.ENTITYREGISTRY_PROVENANCE_ACTION;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.ENTITYREGISTRY_PROVENANCE_ACTION;
|
||||||
|
@ -19,19 +9,25 @@ import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PARTICIPANT;
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PRODUCED_BY;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PRODUCED_BY;
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PROVIDED_BY;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PROVIDED_BY;
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_RELATED_TO;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_RELATED_TO;
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.ORP_DEFAULT_RESULTTYPE;
|
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.OUTCOME;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.OUTCOME;
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.PARTICIPATION;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.PARTICIPATION;
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.PRODUCES;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.PRODUCES;
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.PROJECT_ORGANIZATION;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.PROJECT_ORGANIZATION;
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.PROVIDES;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.PROVIDES;
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.PROVISION;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.PROVISION;
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.PUBLICATION_DEFAULT_RESULTTYPE;
|
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.RELATIONSHIP;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.RELATIONSHIP;
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_PROJECT;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_PROJECT;
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_RESULT;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_RESULT;
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.SOFTWARE_DEFAULT_RESULTTYPE;
|
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.USER_CLAIM;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.USER_CLAIM;
|
||||||
|
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.asString;
|
||||||
|
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.createOpenaireId;
|
||||||
|
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.dataInfo;
|
||||||
|
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.field;
|
||||||
|
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.journal;
|
||||||
|
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.listFields;
|
||||||
|
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.listKeyValues;
|
||||||
|
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.qualifier;
|
||||||
|
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.structuredProperty;
|
||||||
|
|
||||||
import java.io.Closeable;
|
import java.io.Closeable;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
|
@ -1,14 +1,13 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.graph.raw;
|
package eu.dnetlib.dhp.oa.graph.raw;
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.createOpenaireId;
|
|
||||||
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.field;
|
|
||||||
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty;
|
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
||||||
|
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.createOpenaireId;
|
||||||
|
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.field;
|
||||||
|
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.structuredProperty;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Optional;
|
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
@ -293,7 +292,7 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
return prepareListStructPropsWithValidQualifier(
|
return prepareListStructPropsWithValidQualifier(
|
||||||
doc, "//oaf:identifier", "@identifierType", DNET_PID_TYPES, info)
|
doc, "//oaf:identifier", "@identifierType", DNET_PID_TYPES, info)
|
||||||
.stream()
|
.stream()
|
||||||
.map(IdentifierFactory::normalizePidValue)
|
.map(CleaningFunctions::normalizePidValue)
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,10 +1,10 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.graph.raw;
|
package eu.dnetlib.dhp.oa.graph.raw;
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.createOpenaireId;
|
|
||||||
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.field;
|
|
||||||
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty;
|
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
||||||
|
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.createOpenaireId;
|
||||||
|
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.field;
|
||||||
|
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.structuredProperty;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
@ -17,11 +17,8 @@ import org.apache.commons.lang3.StringUtils;
|
||||||
import org.dom4j.Document;
|
import org.dom4j.Document;
|
||||||
import org.dom4j.Node;
|
import org.dom4j.Node;
|
||||||
|
|
||||||
import com.google.common.collect.Lists;
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.common.PacePerson;
|
import eu.dnetlib.dhp.common.PacePerson;
|
||||||
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
|
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
|
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
|
||||||
|
|
||||||
|
@ -382,7 +379,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
|
|
||||||
return res
|
return res
|
||||||
.stream()
|
.stream()
|
||||||
.map(IdentifierFactory::normalizePidValue)
|
.map(CleaningFunctions::normalizePidValue)
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -10,6 +10,7 @@ import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
import com.google.common.collect.Maps;
|
import com.google.common.collect.Maps;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.OafMapperUtils;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
||||||
|
|
||||||
public class Vocabulary implements Serializable {
|
public class Vocabulary implements Serializable {
|
||||||
|
|
|
@ -7,6 +7,7 @@ import java.util.stream.Collectors;
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.OafMapperUtils;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||||
|
|
|
@ -7,8 +7,6 @@ import static org.mockito.Mockito.lenient;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.function.Predicate;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
import java.util.stream.Stream;
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
|
|
|
@ -1,11 +1,13 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.graph.raw;
|
package eu.dnetlib.dhp.oa.graph.raw;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.oa.graph.clean.CleaningFunctionTest;
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
|
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
import static org.mockito.Mockito.lenient;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
import java.io.IOException;
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.junit.jupiter.api.BeforeEach;
|
import org.junit.jupiter.api.BeforeEach;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
@ -13,12 +15,12 @@ import org.junit.jupiter.api.extension.ExtendWith;
|
||||||
import org.mockito.Mock;
|
import org.mockito.Mock;
|
||||||
import org.mockito.junit.jupiter.MockitoExtension;
|
import org.mockito.junit.jupiter.MockitoExtension;
|
||||||
|
|
||||||
import java.io.IOException;
|
import eu.dnetlib.dhp.oa.graph.clean.CleaningFunctionTest;
|
||||||
import java.util.List;
|
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||||
import static org.mockito.Mockito.lenient;
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||||
|
|
||||||
@ExtendWith(MockitoExtension.class)
|
@ExtendWith(MockitoExtension.class)
|
||||||
public class GenerateEntitiesApplicationTest {
|
public class GenerateEntitiesApplicationTest {
|
||||||
|
@ -66,7 +68,8 @@ public class GenerateEntitiesApplicationTest {
|
||||||
verifyMerge(orp, software, Software.class, ModelConstants.SOFTWARE_RESULTTYPE_CLASSID);
|
verifyMerge(orp, software, Software.class, ModelConstants.SOFTWARE_RESULTTYPE_CLASSID);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected <T extends Result> void verifyMerge(Result publication, Result dataset, Class<T> clazz, String resultType) {
|
protected <T extends Result> void verifyMerge(Result publication, Result dataset, Class<T> clazz,
|
||||||
|
String resultType) {
|
||||||
final Result merge = GenerateEntitiesApplication.mergeResults(publication, dataset);
|
final Result merge = GenerateEntitiesApplication.mergeResults(publication, dataset);
|
||||||
assertTrue(clazz.isAssignableFrom(merge.getClass()));
|
assertTrue(clazz.isAssignableFrom(merge.getClass()));
|
||||||
assertEquals(resultType, merge.getResulttype().getClassid());
|
assertEquals(resultType, merge.getResulttype().getClassid());
|
||||||
|
@ -83,7 +86,6 @@ public class GenerateEntitiesApplicationTest {
|
||||||
.get();
|
.get();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private List<String> vocs() throws IOException {
|
private List<String> vocs() throws IOException {
|
||||||
return IOUtils
|
return IOUtils
|
||||||
.readLines(CleaningFunctionTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/terms.txt"));
|
.readLines(CleaningFunctionTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/terms.txt"));
|
||||||
|
|
|
@ -27,10 +27,10 @@ import org.mockito.junit.jupiter.MockitoExtension;
|
||||||
import com.fasterxml.jackson.core.type.TypeReference;
|
import com.fasterxml.jackson.core.type.TypeReference;
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils;
|
|
||||||
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
|
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Datasource;
|
import eu.dnetlib.dhp.schema.oaf.Datasource;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.OafMapperUtils;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Organization;
|
import eu.dnetlib.dhp.schema.oaf.Organization;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Project;
|
import eu.dnetlib.dhp.schema.oaf.Project;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||||
|
|
Loading…
Reference in New Issue