From 86d6fbe95b177c28a0952685abd5f1fee0232d46 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 3 Nov 2020 12:19:46 +0100 Subject: [PATCH] refactoring: CleaningFunctions and OafMapperUtils moved in dhp-commong --- .../dhp/schema/oaf}/CleaningFunctions.java | 56 +++++--- .../dhp/schema/oaf}/OafMapperUtils.java | 53 +++++-- .../dhp/schema/oaf/ResultTypeComparator.java | 64 ++++----- .../schema/oaf/utils/IdentifierFactory.java | 31 +--- .../oa/graph/clean/CleanGraphSparkJob.java | 7 - .../raw/AbstractMdRecordToOafMapper.java | 36 +---- .../raw/MigrateDbEntitiesApplication.java | 22 ++- .../dhp/oa/graph/raw/OafToOafMapper.java | 9 +- .../dhp/oa/graph/raw/OdfToOafMapper.java | 11 +- .../dhp/oa/graph/raw/common/Vocabulary.java | 1 + .../oa/graph/raw/common/VocabularyGroup.java | 1 + .../oa/graph/clean/CleaningFunctionTest.java | 2 - .../raw/GenerateEntitiesApplicationTest.java | 136 +++++++++--------- .../raw/MigrateDbEntitiesApplicationTest.java | 2 +- 14 files changed, 203 insertions(+), 228 deletions(-) rename {dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean => dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf}/CleaningFunctions.java (81%) rename {dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common => dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf}/OafMapperUtils.java (84%) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningFunctions.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/CleaningFunctions.java similarity index 81% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningFunctions.java rename to dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/CleaningFunctions.java index 12543d8c7..1f2b47cfb 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningFunctions.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/CleaningFunctions.java @@ -1,8 +1,9 @@ -package eu.dnetlib.dhp.oa.graph.clean; +package eu.dnetlib.dhp.schema.oaf; import java.util.LinkedHashMap; import java.util.Objects; +import java.util.Optional; import java.util.function.Function; import java.util.stream.Collectors; @@ -10,13 +11,13 @@ import org.apache.commons.lang3.StringUtils; import com.clearspring.analytics.util.Lists; -import eu.dnetlib.dhp.oa.graph.raw.AbstractMdRecordToOafMapper; -import eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; public class CleaningFunctions { + public static final String DOI_URL_PREFIX = "^http(s?):\\/\\/(dx\\.)?doi\\.org\\/"; public static final String ORCID_PREFIX_REGEX = "^http(s?):\\/\\/orcid\\.org\\/"; public static final String NONE = "none"; @@ -72,7 +73,7 @@ public class CleaningFunctions { return value; } - protected static T fixDefaults(T value) { + public static T fixDefaults(T value) { if (value instanceof Datasource) { // nothing to clean here } else if (value instanceof Project) { @@ -109,20 +110,17 @@ public class CleaningFunctions { } if (Objects.nonNull(r.getPid())) { r - .setPid( - r - .getPid() - .stream() - .filter(Objects::nonNull) - .filter(sp -> StringUtils.isNotBlank(StringUtils.trim(sp.getValue()))) - .filter(sp -> NONE.equalsIgnoreCase(sp.getValue())) - .filter(sp -> Objects.nonNull(sp.getQualifier())) - .filter(sp -> StringUtils.isNotBlank(sp.getQualifier().getClassid())) - .map(sp -> { - sp.setValue(StringUtils.trim(sp.getValue())); - return sp; - }) - .collect(Collectors.toList())); + .setPid( + r + .getPid() + .stream() + .filter(Objects::nonNull) + .filter(sp -> StringUtils.isNotBlank(StringUtils.trim(sp.getValue()))) + .filter(sp -> NONE.equalsIgnoreCase(sp.getValue())) + .filter(sp -> Objects.nonNull(sp.getQualifier())) + .filter(sp -> StringUtils.isNotBlank(sp.getQualifier().getClassid())) + .map(CleaningFunctions::normalizePidValue) + .collect(Collectors.toList())); } if (Objects.isNull(r.getResourcetype()) || StringUtils.isBlank(r.getResourcetype().getClassid())) { r @@ -143,7 +141,7 @@ public class CleaningFunctions { } } if (Objects.isNull(r.getBestaccessright()) || StringUtils.isBlank(r.getBestaccessright().getClassid())) { - Qualifier bestaccessrights = AbstractMdRecordToOafMapper.createBestAccessRights(r.getInstance()); + Qualifier bestaccessrights = OafMapperUtils.createBestAccessRights(r.getInstance()); if (Objects.isNull(bestaccessrights)) { r .setBestaccessright( @@ -219,4 +217,24 @@ public class CleaningFunctions { classid, classname, scheme, scheme); } + /** + * Utility method that normalises PID values on a per-type basis. + * @param pid the PID whose value will be normalised. + * @return the PID containing the normalised value. + */ + public static StructuredProperty normalizePidValue(StructuredProperty pid) { + String value = Optional + .ofNullable(pid.getValue()) + .map(String::trim) + .orElseThrow(() -> new IllegalArgumentException("PID value cannot be empty")); + switch (pid.getQualifier().getClassid()) { + + // TODO add cleaning for more PID types as needed + case "doi": + pid.setValue(value.toLowerCase().replaceAll(DOI_URL_PREFIX, "")); + break; + } + return pid; + } + } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/OafMapperUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/OafMapperUtils.java similarity index 84% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/OafMapperUtils.java rename to dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/OafMapperUtils.java index 84b29e3d4..f079c55af 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/OafMapperUtils.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/OafMapperUtils.java @@ -1,11 +1,10 @@ -package eu.dnetlib.dhp.oa.graph.raw.common; +package eu.dnetlib.dhp.schema.oaf; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Map; -import java.util.Objects; +import static eu.dnetlib.dhp.schema.common.ModelConstants.*; +import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_ACCESS_MODES; + +import java.util.*; import java.util.concurrent.ConcurrentHashMap; import java.util.function.Function; import java.util.function.Predicate; @@ -13,15 +12,7 @@ import java.util.stream.Collectors; import org.apache.commons.lang3.StringUtils; -import eu.dnetlib.dhp.schema.oaf.DataInfo; -import eu.dnetlib.dhp.schema.oaf.ExtraInfo; -import eu.dnetlib.dhp.schema.oaf.Field; -import eu.dnetlib.dhp.schema.oaf.Journal; -import eu.dnetlib.dhp.schema.oaf.KeyValue; -import eu.dnetlib.dhp.schema.oaf.OAIProvenance; -import eu.dnetlib.dhp.schema.oaf.OriginDescription; -import eu.dnetlib.dhp.schema.oaf.Qualifier; -import eu.dnetlib.dhp.schema.oaf.StructuredProperty; +import eu.dnetlib.dhp.schema.common.LicenseComparator; import eu.dnetlib.dhp.utils.DHPUtils; public class OafMapperUtils { @@ -270,4 +261,36 @@ public class OafMapperUtils { final Map seen = new ConcurrentHashMap<>(); return t -> seen.putIfAbsent(keyExtractor.apply(t), Boolean.TRUE) == null; } + + public static Qualifier createBestAccessRights(final List instanceList) { + return getBestAccessRights(instanceList); + } + + protected static Qualifier getBestAccessRights(final List instanceList) { + if (instanceList != null) { + final Optional min = instanceList + .stream() + .map(i -> i.getAccessright()) + .min(new LicenseComparator()); + + final Qualifier rights = min.isPresent() ? min.get() : new Qualifier(); + + if (StringUtils.isBlank(rights.getClassid())) { + rights.setClassid(UNKNOWN); + } + if (StringUtils.isBlank(rights.getClassname()) + || UNKNOWN.equalsIgnoreCase(rights.getClassname())) { + rights.setClassname(NOT_AVAILABLE); + } + if (StringUtils.isBlank(rights.getSchemeid())) { + rights.setSchemeid(DNET_ACCESS_MODES); + } + if (StringUtils.isBlank(rights.getSchemename())) { + rights.setSchemename(DNET_ACCESS_MODES); + } + + return rights; + } + return null; + } } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/ResultTypeComparator.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/ResultTypeComparator.java index 11bc7297b..6c11d1a85 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/ResultTypeComparator.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/ResultTypeComparator.java @@ -1,3 +1,4 @@ + package eu.dnetlib.dhp.schema.oaf; import java.util.Comparator; @@ -6,44 +7,43 @@ import eu.dnetlib.dhp.schema.common.ModelConstants; public class ResultTypeComparator implements Comparator { - @Override - public int compare(Result left, Result right) { + @Override + public int compare(Result left, Result right) { - if (left == null && right == null) - return 0; - if (left == null) - return 1; - if (right == null) - return -1; + if (left == null && right == null) + return 0; + if (left == null) + return 1; + if (right == null) + return -1; - String lClass = left.getResulttype().getClassid(); - String rClass = right.getResulttype().getClassid(); + String lClass = left.getResulttype().getClassid(); + String rClass = right.getResulttype().getClassid(); - if (lClass.equals(rClass)) - return 0; + if (lClass.equals(rClass)) + return 0; - if (lClass.equals(ModelConstants.PUBLICATION_RESULTTYPE_CLASSID)) - return -1; - if (rClass.equals(ModelConstants.PUBLICATION_RESULTTYPE_CLASSID)) - return 1; + if (lClass.equals(ModelConstants.PUBLICATION_RESULTTYPE_CLASSID)) + return -1; + if (rClass.equals(ModelConstants.PUBLICATION_RESULTTYPE_CLASSID)) + return 1; - if (lClass.equals(ModelConstants.DATASET_RESULTTYPE_CLASSID)) - return -1; - if (rClass.equals(ModelConstants.DATASET_RESULTTYPE_CLASSID)) - return 1; + if (lClass.equals(ModelConstants.DATASET_RESULTTYPE_CLASSID)) + return -1; + if (rClass.equals(ModelConstants.DATASET_RESULTTYPE_CLASSID)) + return 1; - if (lClass.equals(ModelConstants.SOFTWARE_RESULTTYPE_CLASSID)) - return -1; - if (rClass.equals(ModelConstants.SOFTWARE_RESULTTYPE_CLASSID)) - return 1; + if (lClass.equals(ModelConstants.SOFTWARE_RESULTTYPE_CLASSID)) + return -1; + if (rClass.equals(ModelConstants.SOFTWARE_RESULTTYPE_CLASSID)) + return 1; - if (lClass.equals(ModelConstants.ORP_RESULTTYPE_CLASSID)) - return -1; - if (rClass.equals(ModelConstants.ORP_RESULTTYPE_CLASSID)) - return 1; + if (lClass.equals(ModelConstants.ORP_RESULTTYPE_CLASSID)) + return -1; + if (rClass.equals(ModelConstants.ORP_RESULTTYPE_CLASSID)) + return 1; - // Else (but unlikely), lexicographical ordering will do. - return lClass.compareTo(rClass); - } + // Else (but unlikely), lexicographical ordering will do. + return lClass.compareTo(rClass); + } } - diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactory.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactory.java index c57a7c135..6692b4223 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactory.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactory.java @@ -7,6 +7,7 @@ import java.util.Optional; import org.apache.commons.lang.StringUtils; +import eu.dnetlib.dhp.schema.oaf.CleaningFunctions; import eu.dnetlib.dhp.schema.oaf.OafEntity; import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import eu.dnetlib.dhp.utils.DHPUtils; @@ -16,8 +17,6 @@ import eu.dnetlib.dhp.utils.DHPUtils; */ public class IdentifierFactory implements Serializable { - public static final String DOI_URL_PREFIX = "^http(s?):\\/\\/(dx\\.)?doi\\.org\\/"; - public static final String ID_SEPARATOR = "::"; public static final String ID_PREFIX_SEPARATOR = "|"; public final static String ID_REGEX = "^[0-9][0-9]\\" + ID_PREFIX_SEPARATOR + ".{12}" + ID_SEPARATOR @@ -50,29 +49,9 @@ public class IdentifierFactory implements Serializable { protected static boolean pidFilter(StructuredProperty s) { return Objects.nonNull(s.getQualifier()) && - PidType.isValid(s.getQualifier().getClassid()) && - StringUtils.isNotBlank(StringUtils.trim(s.getValue())) && - !NONE.equals(StringUtils.trim(StringUtils.lowerCase(s.getValue()))); - } - - /** - * Utility method that normalises PID values on a per-type basis. - * @param pid the PID whose value will be normalised. - * @return the PID containing the normalised value. - */ - public static StructuredProperty normalizePidValue(StructuredProperty pid) { - String value = Optional - .ofNullable(pid.getValue()) - .map(String::trim) - .orElseThrow(() -> new IllegalArgumentException("PID value cannot be empty")); - switch (pid.getQualifier().getClassid()) { - - // TODO add cleaning for more PID types as needed - case "doi": - pid.setValue(value.toLowerCase().replaceAll(DOI_URL_PREFIX, "")); - break; - } - return pid; + PidType.isValid(s.getQualifier().getClassid()) && + StringUtils.isNotBlank(StringUtils.trim(s.getValue())) && + !NONE.equals(StringUtils.trim(StringUtils.lowerCase(s.getValue()))); } private static String verifyIdSyntax(String s) { @@ -89,7 +68,7 @@ public class IdentifierFactory implements Serializable { .append(ID_PREFIX_SEPARATOR) .append(createPrefix(s.getQualifier().getClassid())) .append(ID_SEPARATOR) - .append(DHPUtils.md5(normalizePidValue(s).getValue())) + .append(DHPUtils.md5(CleaningFunctions.normalizePidValue(s).getValue())) .toString(); } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleanGraphSparkJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleanGraphSparkJob.java index e295b9503..a15c2ee62 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleanGraphSparkJob.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleanGraphSparkJob.java @@ -3,13 +3,9 @@ package eu.dnetlib.dhp.oa.graph.clean; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; -import java.io.BufferedInputStream; -import java.util.Objects; import java.util.Optional; -import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; -import org.apache.commons.lang3.StringUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; @@ -23,10 +19,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.HdfsSupport; -import eu.dnetlib.dhp.oa.graph.raw.AbstractMdRecordToOafMapper; -import eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils; import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup; -import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java index 57e0aa49e..051225857 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java @@ -1,8 +1,8 @@ package eu.dnetlib.dhp.oa.graph.raw; -import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.*; import static eu.dnetlib.dhp.schema.common.ModelConstants.*; +import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.*; import java.util.*; @@ -282,7 +282,7 @@ public abstract class AbstractMdRecordToOafMapper { r.setExternalReference(new ArrayList<>()); // NOT PRESENT IN MDSTORES r.setInstance(instances); - r.setBestaccessright(getBestAccessRights(instances)); + r.setBestaccessright(OafMapperUtils.createBestAccessRights(instances)); } protected abstract List prepareResultPids(Document doc, DataInfo info); @@ -367,38 +367,6 @@ public abstract class AbstractMdRecordToOafMapper { protected abstract Field prepareDatasetStorageDate(Document doc, DataInfo info); - public static Qualifier createBestAccessRights(final List instanceList) { - return getBestAccessRights(instanceList); - } - - protected static Qualifier getBestAccessRights(final List instanceList) { - if (instanceList != null) { - final Optional min = instanceList - .stream() - .map(i -> i.getAccessright()) - .min(new LicenseComparator()); - - final Qualifier rights = min.isPresent() ? min.get() : new Qualifier(); - - if (StringUtils.isBlank(rights.getClassid())) { - rights.setClassid(UNKNOWN); - } - if (StringUtils.isBlank(rights.getClassname()) - || UNKNOWN.equalsIgnoreCase(rights.getClassname())) { - rights.setClassname(NOT_AVAILABLE); - } - if (StringUtils.isBlank(rights.getSchemeid())) { - rights.setSchemeid(DNET_ACCESS_MODES); - } - if (StringUtils.isBlank(rights.getSchemename())) { - rights.setSchemename(DNET_ACCESS_MODES); - } - - return rights; - } - return null; - } - private Journal prepareJournal(final Document doc, final DataInfo info) { final Node n = doc.selectSingleNode("//oaf:journal"); if (n != null) { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java index 30fdd17e9..16b0aa80b 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java @@ -1,16 +1,6 @@ package eu.dnetlib.dhp.oa.graph.raw; -import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.asString; -import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.createOpenaireId; -import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.dataInfo; -import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.field; -import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.journal; -import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.listFields; -import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.listKeyValues; -import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.qualifier; -import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty; -import static eu.dnetlib.dhp.schema.common.ModelConstants.DATASET_DEFAULT_RESULTTYPE; import static eu.dnetlib.dhp.schema.common.ModelConstants.DATASOURCE_ORGANIZATION; import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PROVENANCE_ACTIONS; import static eu.dnetlib.dhp.schema.common.ModelConstants.ENTITYREGISTRY_PROVENANCE_ACTION; @@ -19,19 +9,25 @@ import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PARTICIPANT; import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PRODUCED_BY; import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PROVIDED_BY; import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_RELATED_TO; -import static eu.dnetlib.dhp.schema.common.ModelConstants.ORP_DEFAULT_RESULTTYPE; import static eu.dnetlib.dhp.schema.common.ModelConstants.OUTCOME; import static eu.dnetlib.dhp.schema.common.ModelConstants.PARTICIPATION; import static eu.dnetlib.dhp.schema.common.ModelConstants.PRODUCES; import static eu.dnetlib.dhp.schema.common.ModelConstants.PROJECT_ORGANIZATION; import static eu.dnetlib.dhp.schema.common.ModelConstants.PROVIDES; import static eu.dnetlib.dhp.schema.common.ModelConstants.PROVISION; -import static eu.dnetlib.dhp.schema.common.ModelConstants.PUBLICATION_DEFAULT_RESULTTYPE; import static eu.dnetlib.dhp.schema.common.ModelConstants.RELATIONSHIP; import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_PROJECT; import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_RESULT; -import static eu.dnetlib.dhp.schema.common.ModelConstants.SOFTWARE_DEFAULT_RESULTTYPE; import static eu.dnetlib.dhp.schema.common.ModelConstants.USER_CLAIM; +import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.asString; +import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.createOpenaireId; +import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.dataInfo; +import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.field; +import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.journal; +import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.listFields; +import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.listKeyValues; +import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.qualifier; +import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.structuredProperty; import java.io.Closeable; import java.io.IOException; diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java index 4813a202b..a1798e4fb 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java @@ -1,14 +1,13 @@ package eu.dnetlib.dhp.oa.graph.raw; -import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.createOpenaireId; -import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.field; -import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty; import static eu.dnetlib.dhp.schema.common.ModelConstants.*; +import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.createOpenaireId; +import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.field; +import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.structuredProperty; import java.util.ArrayList; import java.util.List; -import java.util.Optional; import java.util.stream.Collectors; import org.apache.commons.lang3.StringUtils; @@ -293,7 +292,7 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { return prepareListStructPropsWithValidQualifier( doc, "//oaf:identifier", "@identifierType", DNET_PID_TYPES, info) .stream() - .map(IdentifierFactory::normalizePidValue) + .map(CleaningFunctions::normalizePidValue) .collect(Collectors.toList()); } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java index d819de1cb..b7ee2d546 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java @@ -1,10 +1,10 @@ package eu.dnetlib.dhp.oa.graph.raw; -import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.createOpenaireId; -import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.field; -import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty; import static eu.dnetlib.dhp.schema.common.ModelConstants.*; +import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.createOpenaireId; +import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.field; +import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.structuredProperty; import java.util.ArrayList; import java.util.Arrays; @@ -17,11 +17,8 @@ import org.apache.commons.lang3.StringUtils; import org.dom4j.Document; import org.dom4j.Node; -import com.google.common.collect.Lists; - import eu.dnetlib.dhp.common.PacePerson; import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup; -import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; @@ -382,7 +379,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { return res .stream() - .map(IdentifierFactory::normalizePidValue) + .map(CleaningFunctions::normalizePidValue) .collect(Collectors.toList()); } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/Vocabulary.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/Vocabulary.java index 9bf198c8b..bfc4fd6f1 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/Vocabulary.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/Vocabulary.java @@ -10,6 +10,7 @@ import org.apache.commons.lang3.StringUtils; import com.google.common.collect.Maps; +import eu.dnetlib.dhp.schema.oaf.OafMapperUtils; import eu.dnetlib.dhp.schema.oaf.Qualifier; public class Vocabulary implements Serializable { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/VocabularyGroup.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/VocabularyGroup.java index 334339d3b..32452bdc5 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/VocabularyGroup.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/VocabularyGroup.java @@ -7,6 +7,7 @@ import java.util.stream.Collectors; import org.apache.commons.lang3.StringUtils; +import eu.dnetlib.dhp.schema.oaf.OafMapperUtils; import eu.dnetlib.dhp.schema.oaf.Qualifier; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleaningFunctionTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleaningFunctionTest.java index 8a53c3a50..5ad8f2ac7 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleaningFunctionTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleaningFunctionTest.java @@ -7,8 +7,6 @@ import static org.mockito.Mockito.lenient; import java.io.IOException; import java.util.List; import java.util.Set; -import java.util.function.Predicate; -import java.util.stream.Collectors; import java.util.stream.Stream; import org.apache.commons.io.IOUtils; diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java index 372d26076..a028738ea 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java @@ -1,11 +1,13 @@ + package eu.dnetlib.dhp.oa.graph.raw; -import eu.dnetlib.dhp.oa.graph.clean.CleaningFunctionTest; -import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup; -import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.*; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.Mockito.lenient; + +import java.io.IOException; +import java.util.List; + import org.apache.commons.io.IOUtils; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -13,85 +15,85 @@ import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; -import java.io.IOException; -import java.util.List; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.mockito.Mockito.lenient; +import eu.dnetlib.dhp.oa.graph.clean.CleaningFunctionTest; +import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @ExtendWith(MockitoExtension.class) public class GenerateEntitiesApplicationTest { - @Mock - private ISLookUpService isLookUpService; + @Mock + private ISLookUpService isLookUpService; - @Mock - private VocabularyGroup vocs; + @Mock + private VocabularyGroup vocs; - @BeforeEach - public void setUp() throws IOException, ISLookUpException { + @BeforeEach + public void setUp() throws IOException, ISLookUpException { - lenient().when(isLookUpService.quickSearchProfile(VocabularyGroup.VOCABULARIES_XQUERY)).thenReturn(vocs()); - lenient() - .when(isLookUpService.quickSearchProfile(VocabularyGroup.VOCABULARY_SYNONYMS_XQUERY)) - .thenReturn(synonyms()); + lenient().when(isLookUpService.quickSearchProfile(VocabularyGroup.VOCABULARIES_XQUERY)).thenReturn(vocs()); + lenient() + .when(isLookUpService.quickSearchProfile(VocabularyGroup.VOCABULARY_SYNONYMS_XQUERY)) + .thenReturn(synonyms()); - vocs = VocabularyGroup.loadVocsFromIS(isLookUpService); - } + vocs = VocabularyGroup.loadVocsFromIS(isLookUpService); + } - @Test - public void testMergeResult() throws IOException { - Result publication = getResult("oaf_record.xml", Publication.class); - Result dataset = getResult("odf_dataset.xml", Dataset.class); - Result software = getResult("odf_software.xml", Software.class); - Result orp = getResult("oaf_orp.xml", OtherResearchProduct.class); + @Test + public void testMergeResult() throws IOException { + Result publication = getResult("oaf_record.xml", Publication.class); + Result dataset = getResult("odf_dataset.xml", Dataset.class); + Result software = getResult("odf_software.xml", Software.class); + Result orp = getResult("oaf_orp.xml", OtherResearchProduct.class); - verifyMerge(publication, dataset, Publication.class, ModelConstants.PUBLICATION_RESULTTYPE_CLASSID); - verifyMerge(dataset, publication, Publication.class, ModelConstants.PUBLICATION_RESULTTYPE_CLASSID); + verifyMerge(publication, dataset, Publication.class, ModelConstants.PUBLICATION_RESULTTYPE_CLASSID); + verifyMerge(dataset, publication, Publication.class, ModelConstants.PUBLICATION_RESULTTYPE_CLASSID); - verifyMerge(publication, software, Publication.class, ModelConstants.PUBLICATION_RESULTTYPE_CLASSID); - verifyMerge(software, publication, Publication.class, ModelConstants.PUBLICATION_RESULTTYPE_CLASSID); + verifyMerge(publication, software, Publication.class, ModelConstants.PUBLICATION_RESULTTYPE_CLASSID); + verifyMerge(software, publication, Publication.class, ModelConstants.PUBLICATION_RESULTTYPE_CLASSID); - verifyMerge(publication, orp, Publication.class, ModelConstants.PUBLICATION_RESULTTYPE_CLASSID); - verifyMerge(orp, publication, Publication.class, ModelConstants.PUBLICATION_RESULTTYPE_CLASSID); + verifyMerge(publication, orp, Publication.class, ModelConstants.PUBLICATION_RESULTTYPE_CLASSID); + verifyMerge(orp, publication, Publication.class, ModelConstants.PUBLICATION_RESULTTYPE_CLASSID); - verifyMerge(dataset, software, Dataset.class, ModelConstants.DATASET_RESULTTYPE_CLASSID); - verifyMerge(software, dataset, Dataset.class, ModelConstants.DATASET_RESULTTYPE_CLASSID); + verifyMerge(dataset, software, Dataset.class, ModelConstants.DATASET_RESULTTYPE_CLASSID); + verifyMerge(software, dataset, Dataset.class, ModelConstants.DATASET_RESULTTYPE_CLASSID); - verifyMerge(dataset, orp, Dataset.class, ModelConstants.DATASET_RESULTTYPE_CLASSID); - verifyMerge(orp, dataset, Dataset.class, ModelConstants.DATASET_RESULTTYPE_CLASSID); + verifyMerge(dataset, orp, Dataset.class, ModelConstants.DATASET_RESULTTYPE_CLASSID); + verifyMerge(orp, dataset, Dataset.class, ModelConstants.DATASET_RESULTTYPE_CLASSID); - verifyMerge(software, orp, Software.class, ModelConstants.SOFTWARE_RESULTTYPE_CLASSID); - verifyMerge(orp, software, Software.class, ModelConstants.SOFTWARE_RESULTTYPE_CLASSID); - } + verifyMerge(software, orp, Software.class, ModelConstants.SOFTWARE_RESULTTYPE_CLASSID); + verifyMerge(orp, software, Software.class, ModelConstants.SOFTWARE_RESULTTYPE_CLASSID); + } - protected void verifyMerge(Result publication, Result dataset, Class clazz, String resultType) { - final Result merge = GenerateEntitiesApplication.mergeResults(publication, dataset); - assertTrue(clazz.isAssignableFrom(merge.getClass())); - assertEquals(resultType, merge.getResulttype().getClassid()); - } + protected void verifyMerge(Result publication, Result dataset, Class clazz, + String resultType) { + final Result merge = GenerateEntitiesApplication.mergeResults(publication, dataset); + assertTrue(clazz.isAssignableFrom(merge.getClass())); + assertEquals(resultType, merge.getResulttype().getClassid()); + } - protected Result getResult(String xmlFileName, Class clazz) throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream(xmlFileName)); - return new OdfToOafMapper(vocs, false) - .processMdRecord(xml) - .stream() - .filter(s -> clazz.isAssignableFrom(s.getClass())) - .map(s -> (Result) s) - .findFirst() - .get(); - } + protected Result getResult(String xmlFileName, Class clazz) throws IOException { + final String xml = IOUtils.toString(getClass().getResourceAsStream(xmlFileName)); + return new OdfToOafMapper(vocs, false) + .processMdRecord(xml) + .stream() + .filter(s -> clazz.isAssignableFrom(s.getClass())) + .map(s -> (Result) s) + .findFirst() + .get(); + } + private List vocs() throws IOException { + return IOUtils + .readLines(CleaningFunctionTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/terms.txt")); + } - private List vocs() throws IOException { - return IOUtils - .readLines(CleaningFunctionTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/terms.txt")); - } - - private List synonyms() throws IOException { - return IOUtils - .readLines(CleaningFunctionTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt")); - } + private List synonyms() throws IOException { + return IOUtils + .readLines(CleaningFunctionTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt")); + } } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java index f663d6095..3c0e2ce8e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java @@ -27,10 +27,10 @@ import org.mockito.junit.jupiter.MockitoExtension; import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils; import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup; import eu.dnetlib.dhp.schema.oaf.Datasource; import eu.dnetlib.dhp.schema.oaf.Oaf; +import eu.dnetlib.dhp.schema.oaf.OafMapperUtils; import eu.dnetlib.dhp.schema.oaf.Organization; import eu.dnetlib.dhp.schema.oaf.Project; import eu.dnetlib.dhp.schema.oaf.Relation;