diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/Vocabulary.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/Vocabulary.java index 1e333e93f..a9daede8f 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/Vocabulary.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/Vocabulary.java @@ -10,8 +10,8 @@ import org.apache.commons.lang3.StringUtils; import com.google.common.collect.Maps; -import eu.dnetlib.dhp.schema.oaf.OafMapperUtils; import eu.dnetlib.dhp.schema.oaf.Qualifier; +import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; public class Vocabulary implements Serializable { diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/VocabularyGroup.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/VocabularyGroup.java index 12c6279e5..a89bb486f 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/VocabularyGroup.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/VocabularyGroup.java @@ -7,8 +7,8 @@ import java.util.stream.Collectors; import org.apache.commons.lang3.StringUtils; -import eu.dnetlib.dhp.schema.oaf.OafMapperUtils; import eu.dnetlib.dhp.schema.oaf.Qualifier; +import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/CleaningFunctions.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/CleaningFunctions.java deleted file mode 100644 index 673bee314..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/CleaningFunctions.java +++ /dev/null @@ -1,399 +0,0 @@ - -package eu.dnetlib.dhp.schema.oaf; - -import java.util.*; -import java.util.function.Function; -import java.util.stream.Collectors; -import java.util.stream.Stream; - -import org.apache.commons.lang3.StringUtils; - -import com.clearspring.analytics.util.Lists; -import com.google.common.collect.Sets; - -import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.utils.PidBlacklistProvider; - -public class CleaningFunctions { - - public static final String DOI_PREFIX_REGEX = "(^10\\.|\\/10.)"; - public static final String DOI_PREFIX = "10."; - - public static final String ORCID_PREFIX_REGEX = "^http(s?):\\/\\/orcid\\.org\\/"; - public static final String CLEANING_REGEX = "(?:\\n|\\r|\\t)"; - - public static final Set PID_BLACKLIST = new HashSet<>(); - public static final String INVALID_AUTHOR_REGEX = ".*deactivated.*"; - public static final String TITLE_FILTER_REGEX = "[.*test.*\\W\\d]"; - public static final int TITLE_FILTER_RESIDUAL_LENGTH = 10; - - static { - PID_BLACKLIST.add("none"); - PID_BLACKLIST.add("na"); - } - - public static T fixVocabularyNames(T value) { - if (value instanceof Datasource) { - // nothing to clean here - } else if (value instanceof Project) { - // nothing to clean here - } else if (value instanceof Organization) { - Organization o = (Organization) value; - if (Objects.nonNull(o.getCountry())) { - fixVocabName(o.getCountry(), ModelConstants.DNET_COUNTRY_TYPE); - } - } else if (value instanceof Relation) { - // nothing to clean here - } else if (value instanceof Result) { - - Result r = (Result) value; - - fixVocabName(r.getLanguage(), ModelConstants.DNET_LANGUAGES); - fixVocabName(r.getResourcetype(), ModelConstants.DNET_DATA_CITE_RESOURCE); - fixVocabName(r.getBestaccessright(), ModelConstants.DNET_ACCESS_MODES); - - if (Objects.nonNull(r.getSubject())) { - r.getSubject().forEach(s -> fixVocabName(s.getQualifier(), ModelConstants.DNET_SUBJECT_TYPOLOGIES)); - } - if (Objects.nonNull(r.getInstance())) { - for (Instance i : r.getInstance()) { - fixVocabName(i.getAccessright(), ModelConstants.DNET_ACCESS_MODES); - fixVocabName(i.getRefereed(), ModelConstants.DNET_REVIEW_LEVELS); - } - } - if (Objects.nonNull(r.getAuthor())) { - r.getAuthor().stream().filter(Objects::nonNull).forEach(a -> { - if (Objects.nonNull(a.getPid())) { - a.getPid().stream().filter(Objects::nonNull).forEach(p -> { - fixVocabName(p.getQualifier(), ModelConstants.DNET_PID_TYPES); - }); - } - }); - } - if (value instanceof Publication) { - - } else if (value instanceof eu.dnetlib.dhp.schema.oaf.Dataset) { - - } else if (value instanceof OtherResearchProduct) { - - } else if (value instanceof Software) { - - } - } - - return value; - } - - public static boolean filter(T value) { - if (value instanceof Datasource) { - // nothing to evaluate here - } else if (value instanceof Project) { - // nothing to evaluate here - } else if (value instanceof Organization) { - // nothing to evaluate here - } else if (value instanceof Relation) { - // nothing to clean here - } else if (value instanceof Result) { - - Result r = (Result) value; - - if (Objects.nonNull(r.getTitle()) && r.getTitle().isEmpty()) { - return false; - } - - if (value instanceof Publication) { - - } else if (value instanceof eu.dnetlib.dhp.schema.oaf.Dataset) { - - } else if (value instanceof OtherResearchProduct) { - - } else if (value instanceof Software) { - - } - } - return true; - } - - public static T cleanup(T value) { - if (value instanceof Datasource) { - // nothing to clean here - } else if (value instanceof Project) { - // nothing to clean here - } else if (value instanceof Organization) { - Organization o = (Organization) value; - if (Objects.isNull(o.getCountry()) || StringUtils.isBlank(o.getCountry().getClassid())) { - o.setCountry(ModelConstants.UNKNOWN_COUNTRY); - } - } else if (value instanceof Relation) { - // nothing to clean here - } else if (value instanceof Result) { - - Result r = (Result) value; - if (Objects.nonNull(r.getPublisher()) && StringUtils.isBlank(r.getPublisher().getValue())) { - r.setPublisher(null); - } - if (Objects.isNull(r.getLanguage()) || StringUtils.isBlank(r.getLanguage().getClassid())) { - r - .setLanguage( - qualifier("und", "Undetermined", ModelConstants.DNET_LANGUAGES)); - } - if (Objects.nonNull(r.getSubject())) { - r - .setSubject( - r - .getSubject() - .stream() - .filter(Objects::nonNull) - .filter(sp -> StringUtils.isNotBlank(sp.getValue())) - .filter(sp -> Objects.nonNull(sp.getQualifier())) - .filter(sp -> StringUtils.isNotBlank(sp.getQualifier().getClassid())) - .map(CleaningFunctions::cleanValue) - .collect(Collectors.toList())); - } - if (Objects.nonNull(r.getTitle())) { - r - .setTitle( - r - .getTitle() - .stream() - .filter(Objects::nonNull) - .filter(sp -> StringUtils.isNotBlank(sp.getValue())) - .filter( - sp -> sp - .getValue() - .toLowerCase() - .replaceAll(TITLE_FILTER_REGEX, "") - .length() > TITLE_FILTER_RESIDUAL_LENGTH) - .map(CleaningFunctions::cleanValue) - .collect(Collectors.toList())); - } - if (Objects.nonNull(r.getDescription())) { - r - .setDescription( - r - .getDescription() - .stream() - .filter(Objects::nonNull) - .filter(sp -> StringUtils.isNotBlank(sp.getValue())) - .map(CleaningFunctions::cleanValue) - .collect(Collectors.toList())); - } - if (Objects.nonNull(r.getPid())) { - r.setPid(processPidCleaning(r.getPid())); - } - if (Objects.isNull(r.getResourcetype()) || StringUtils.isBlank(r.getResourcetype().getClassid())) { - r - .setResourcetype( - qualifier(ModelConstants.UNKNOWN, "Unknown", ModelConstants.DNET_DATA_CITE_RESOURCE)); - } - if (Objects.nonNull(r.getInstance())) { - - for (Instance i : r.getInstance()) { - Optional - .ofNullable(i.getPid()) - .ifPresent(pid -> { - final Set pids = pid - .stream() - .filter(Objects::nonNull) - .filter(p -> StringUtils.isNotBlank(p.getValue())) - .collect(Collectors.toCollection(HashSet::new)); - - Optional - .ofNullable(i.getAlternateIdentifier()) - .ifPresent(altId -> { - final Set altIds = altId - .stream() - .filter(Objects::nonNull) - .filter(p -> StringUtils.isNotBlank(p.getValue())) - .collect(Collectors.toCollection(HashSet::new)); - - i.setAlternateIdentifier(Lists.newArrayList(Sets.difference(altIds, pids))); - }); - }); - - if (Objects.isNull(i.getAccessright()) || StringUtils.isBlank(i.getAccessright().getClassid())) { - i - .setAccessright( - accessRight( - ModelConstants.UNKNOWN, ModelConstants.NOT_AVAILABLE, - ModelConstants.DNET_ACCESS_MODES)); - } - if (Objects.isNull(i.getHostedby()) || StringUtils.isBlank(i.getHostedby().getKey())) { - i.setHostedby(ModelConstants.UNKNOWN_REPOSITORY); - } - if (Objects.isNull(i.getRefereed())) { - i.setRefereed(qualifier("0000", "Unknown", ModelConstants.DNET_REVIEW_LEVELS)); - } - } - } - if (Objects.isNull(r.getBestaccessright()) || StringUtils.isBlank(r.getBestaccessright().getClassid())) { - Qualifier bestaccessrights = OafMapperUtils.createBestAccessRights(r.getInstance()); - if (Objects.isNull(bestaccessrights)) { - r - .setBestaccessright( - qualifier( - ModelConstants.UNKNOWN, ModelConstants.NOT_AVAILABLE, - ModelConstants.DNET_ACCESS_MODES)); - } else { - r.setBestaccessright(bestaccessrights); - } - } - if (Objects.nonNull(r.getAuthor())) { - final List authors = Lists.newArrayList(); - for (Author a : r.getAuthor()) { - if (Objects.isNull(a.getPid())) { - a.setPid(Lists.newArrayList()); - } else { - a - .setPid( - a - .getPid() - .stream() - .filter(Objects::nonNull) - .filter(p -> Objects.nonNull(p.getQualifier())) - .filter(p -> StringUtils.isNotBlank(p.getValue())) - .map(p -> { - p.setValue(p.getValue().trim().replaceAll(ORCID_PREFIX_REGEX, "")); - return p; - }) - .filter(p -> StringUtils.isNotBlank(p.getValue())) - .collect( - Collectors - .toMap( - StructuredProperty::getValue, Function.identity(), (p1, p2) -> p1, - LinkedHashMap::new)) - .values() - .stream() - .collect(Collectors.toList())); - } - if (StringUtils.isBlank(a.getFullname())) { - if (StringUtils.isNotBlank(a.getName()) && StringUtils.isNotBlank(a.getSurname())) { - a.setFullname(a.getSurname() + ", " + a.getName()); - } - } - if (StringUtils.isNotBlank(a.getFullname()) && isValidAuthorName(a)) { - authors.add(a); - } - } - - boolean nullRank = authors - .stream() - .anyMatch(a -> Objects.isNull(a.getRank())); - if (nullRank) { - int i = 1; - for (Author author : authors) { - author.setRank(i++); - } - } - r.setAuthor(authors); - - } - if (value instanceof Publication) { - - } else if (value instanceof eu.dnetlib.dhp.schema.oaf.Dataset) { - - } else if (value instanceof OtherResearchProduct) { - - } else if (value instanceof Software) { - - } - } - - return value; - } - - private static boolean isValidAuthorName(Author a) { - return !Stream - .of(a.getFullname(), a.getName(), a.getSurname()) - .filter(s -> s != null && !s.isEmpty()) - .collect(Collectors.joining("")) - .toLowerCase() - .matches(INVALID_AUTHOR_REGEX); - } - - private static List processPidCleaning(List pids) { - return pids - .stream() - .filter(Objects::nonNull) - .filter(sp -> StringUtils.isNotBlank(StringUtils.trim(sp.getValue()))) - .filter(sp -> !PID_BLACKLIST.contains(sp.getValue().trim().toLowerCase())) - .filter(sp -> Objects.nonNull(sp.getQualifier())) - .filter(sp -> StringUtils.isNotBlank(sp.getQualifier().getClassid())) - .map(CleaningFunctions::normalizePidValue) - .filter(CleaningFunctions::pidFilter) - .collect(Collectors.toList()); - } - - protected static StructuredProperty cleanValue(StructuredProperty s) { - s.setValue(s.getValue().replaceAll(CLEANING_REGEX, " ")); - return s; - } - - protected static Field cleanValue(Field s) { - s.setValue(s.getValue().replaceAll(CLEANING_REGEX, " ")); - return s; - } - - // HELPERS - - private static void fixVocabName(Qualifier q, String vocabularyName) { - if (Objects.nonNull(q) && StringUtils.isBlank(q.getSchemeid())) { - q.setSchemeid(vocabularyName); - q.setSchemename(vocabularyName); - } - } - - private static AccessRight accessRight(String classid, String classname, String scheme) { - return OafMapperUtils - .accessRight( - classid, classname, scheme, scheme); - } - - private static Qualifier qualifier(String classid, String classname, String scheme) { - return OafMapperUtils - .qualifier( - classid, classname, scheme, scheme); - } - - /** - * Utility method that filter PID values on a per-type basis. - * @param s the PID whose value will be checked. - * @return false if the pid matches the filter criteria, true otherwise. - */ - public static boolean pidFilter(StructuredProperty s) { - final String pidValue = s.getValue(); - if (Objects.isNull(s.getQualifier()) || - StringUtils.isBlank(pidValue) || - StringUtils.isBlank(pidValue.replaceAll("(?:\\n|\\r|\\t|\\s)", ""))) { - return false; - } - if (CleaningFunctions.PID_BLACKLIST.contains(pidValue)) { - return false; - } - if (PidBlacklistProvider.getBlacklist(s.getQualifier().getClassid()).contains(pidValue)) { - return false; - } - return true; - } - - /** - * Utility method that normalises PID values on a per-type basis. - * @param pid the PID whose value will be normalised. - * @return the PID containing the normalised value. - */ - public static StructuredProperty normalizePidValue(StructuredProperty pid) { - String value = Optional - .ofNullable(pid.getValue()) - .map(String::trim) - .orElseThrow(() -> new IllegalArgumentException("PID value cannot be empty")); - switch (pid.getQualifier().getClassid()) { - - // TODO add cleaning for more PID types as needed - case "doi": - pid.setValue(value.toLowerCase().replaceFirst(DOI_PREFIX_REGEX, DOI_PREFIX)); - break; - } - return pid; - } - -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/ModelHardLimits.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/ModelHardLimits.java deleted file mode 100644 index 147bd31b2..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/ModelHardLimits.java +++ /dev/null @@ -1,22 +0,0 @@ - -package eu.dnetlib.dhp.schema.oaf; - -public class ModelHardLimits { - - public static final String LAYOUT = "index"; - public static final String INTERPRETATION = "openaire"; - public static final String SEPARATOR = "-"; - - public static final int MAX_EXTERNAL_ENTITIES = 50; - public static final int MAX_AUTHORS = 200; - public static final int MAX_AUTHOR_FULLNAME_LENGTH = 1000; - public static final int MAX_TITLE_LENGTH = 5000; - public static final int MAX_TITLES = 10; - public static final int MAX_ABSTRACT_LENGTH = 150000; - public static final int MAX_INSTANCES = 10; - - public static String getCollectionName(String format) { - return format + SEPARATOR + LAYOUT + SEPARATOR + INTERPRETATION; - } - -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/OafMapperUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/OafMapperUtils.java deleted file mode 100644 index b0e8a65d2..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/OafMapperUtils.java +++ /dev/null @@ -1,364 +0,0 @@ - -package eu.dnetlib.dhp.schema.oaf; - -import static eu.dnetlib.dhp.schema.common.ModelConstants.*; - -import java.util.*; -import java.util.concurrent.ConcurrentHashMap; -import java.util.function.Function; -import java.util.function.Predicate; -import java.util.stream.Collectors; - -import org.apache.commons.lang3.StringUtils; - -import eu.dnetlib.dhp.schema.common.AccessRightComparator; -import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.utils.DHPUtils; - -public class OafMapperUtils { - - public static Oaf merge(final Oaf left, final Oaf right) { - if (ModelSupport.isSubClass(left, OafEntity.class)) { - return mergeEntities((OafEntity) left, (OafEntity) right); - } else if (ModelSupport.isSubClass(left, Relation.class)) { - ((Relation) left).mergeFrom((Relation) right); - } else { - throw new RuntimeException("invalid Oaf type:" + left.getClass().getCanonicalName()); - } - return left; - } - - public static OafEntity mergeEntities(OafEntity left, OafEntity right) { - if (ModelSupport.isSubClass(left, Result.class)) { - return mergeResults((Result) left, (Result) right); - } else if (ModelSupport.isSubClass(left, Datasource.class)) { - ((Datasource) left).mergeFrom((Datasource) right); - } else if (ModelSupport.isSubClass(left, Organization.class)) { - ((Organization) left).mergeFrom((Organization) right); - } else if (ModelSupport.isSubClass(left, Project.class)) { - ((Project) left).mergeFrom((Project) right); - } else { - throw new RuntimeException("invalid OafEntity subtype:" + left.getClass().getCanonicalName()); - } - return left; - } - - public static Result mergeResults(Result left, Result right) { - if (new ResultTypeComparator().compare(left, right) < 0) { - left.mergeFrom(right); - return left; - } else { - right.mergeFrom(left); - return right; - } - } - - public static KeyValue keyValue(final String k, final String v) { - final KeyValue kv = new KeyValue(); - kv.setKey(k); - kv.setValue(v); - return kv; - } - - public static List listKeyValues(final String... s) { - if (s.length % 2 > 0) { - throw new RuntimeException("Invalid number of parameters (k,v,k,v,....)"); - } - - final List list = new ArrayList<>(); - for (int i = 0; i < s.length; i += 2) { - list.add(keyValue(s[i], s[i + 1])); - } - return list; - } - - public static Field field(final T value, final DataInfo info) { - if (value == null || StringUtils.isBlank(value.toString())) { - return null; - } - - final Field field = new Field<>(); - field.setValue(value); - field.setDataInfo(info); - return field; - } - - public static List> listFields(final DataInfo info, final String... values) { - return Arrays - .stream(values) - .map(v -> field(v, info)) - .filter(Objects::nonNull) - .filter(distinctByKey(f -> f.getValue())) - .collect(Collectors.toList()); - } - - public static List> listFields(final DataInfo info, final List values) { - return values - .stream() - .map(v -> field(v, info)) - .filter(Objects::nonNull) - .filter(distinctByKey(f -> f.getValue())) - .collect(Collectors.toList()); - } - - public static Qualifier unknown(final String schemeid, final String schemename) { - return qualifier("UNKNOWN", "Unknown", schemeid, schemename); - } - - public static AccessRight accessRight( - final String classid, - final String classname, - final String schemeid, - final String schemename) { - return accessRight(classid, classname, schemeid, schemename, null); - } - - public static AccessRight accessRight( - final String classid, - final String classname, - final String schemeid, - final String schemename, - final OpenAccessRoute openAccessRoute) { - final AccessRight accessRight = new AccessRight(); - accessRight.setClassid(classid); - accessRight.setClassname(classname); - accessRight.setSchemeid(schemeid); - accessRight.setSchemename(schemename); - accessRight.setOpenAccessRoute(openAccessRoute); - return accessRight; - } - - public static Qualifier qualifier( - final String classid, - final String classname, - final String schemeid, - final String schemename) { - final Qualifier q = new Qualifier(); - q.setClassid(classid); - q.setClassname(classname); - q.setSchemeid(schemeid); - q.setSchemename(schemename); - return q; - } - - public static Qualifier qualifier(final Qualifier qualifier) { - final Qualifier q = new Qualifier(); - q.setClassid(qualifier.getClassid()); - q.setClassname(qualifier.getClassname()); - q.setSchemeid(qualifier.getSchemeid()); - q.setSchemename(qualifier.getSchemename()); - return q; - } - - public static StructuredProperty structuredProperty( - final String value, - final String classid, - final String classname, - final String schemeid, - final String schemename, - final DataInfo dataInfo) { - - return structuredProperty(value, qualifier(classid, classname, schemeid, schemename), dataInfo); - } - - public static StructuredProperty structuredProperty( - final String value, - final Qualifier qualifier, - final DataInfo dataInfo) { - if (value == null) { - return null; - } - final StructuredProperty sp = new StructuredProperty(); - sp.setValue(value); - sp.setQualifier(qualifier); - sp.setDataInfo(dataInfo); - return sp; - } - - public static ExtraInfo extraInfo( - final String name, - final String value, - final String typology, - final String provenance, - final String trust) { - final ExtraInfo info = new ExtraInfo(); - info.setName(name); - info.setValue(value); - info.setTypology(typology); - info.setProvenance(provenance); - info.setTrust(trust); - return info; - } - - public static OAIProvenance oaiIProvenance( - final String identifier, - final String baseURL, - final String metadataNamespace, - final Boolean altered, - final String datestamp, - final String harvestDate) { - - final OriginDescription desc = new OriginDescription(); - desc.setIdentifier(identifier); - desc.setBaseURL(baseURL); - desc.setMetadataNamespace(metadataNamespace); - desc.setAltered(altered); - desc.setDatestamp(datestamp); - desc.setHarvestDate(harvestDate); - - final OAIProvenance p = new OAIProvenance(); - p.setOriginDescription(desc); - - return p; - } - - public static Journal journal( - final String name, - final String issnPrinted, - final String issnOnline, - final String issnLinking, - final DataInfo dataInfo) { - return journal( - name, - issnPrinted, - issnOnline, - issnLinking, - null, - null, - null, - null, - null, - null, - null, - dataInfo); - } - - public static Journal journal( - final String name, - final String issnPrinted, - final String issnOnline, - final String issnLinking, - final String ep, - final String iss, - final String sp, - final String vol, - final String edition, - final String conferenceplace, - final String conferencedate, - final DataInfo dataInfo) { - - if (StringUtils.isNotBlank(name) - || StringUtils.isNotBlank(issnPrinted) - || StringUtils.isNotBlank(issnOnline) - || StringUtils.isNotBlank(issnLinking)) { - final Journal j = new Journal(); - j.setName(name); - j.setIssnPrinted(issnPrinted); - j.setIssnOnline(issnOnline); - j.setIssnLinking(issnLinking); - j.setEp(ep); - j.setIss(iss); - j.setSp(sp); - j.setVol(vol); - j.setEdition(edition); - j.setConferenceplace(conferenceplace); - j.setConferencedate(conferencedate); - j.setDataInfo(dataInfo); - return j; - } else { - return null; - } - } - - public static DataInfo dataInfo( - final Boolean deletedbyinference, - final String inferenceprovenance, - final Boolean inferred, - final Boolean invisible, - final Qualifier provenanceaction, - final String trust) { - final DataInfo d = new DataInfo(); - d.setDeletedbyinference(deletedbyinference); - d.setInferenceprovenance(inferenceprovenance); - d.setInferred(inferred); - d.setInvisible(invisible); - d.setProvenanceaction(provenanceaction); - d.setTrust(trust); - return d; - } - - public static String createOpenaireId( - final int prefix, - final String originalId, - final boolean to_md5) { - if (StringUtils.isBlank(originalId)) { - return null; - } else if (to_md5) { - final String nsPrefix = StringUtils.substringBefore(originalId, "::"); - final String rest = StringUtils.substringAfter(originalId, "::"); - return String.format("%s|%s::%s", prefix, nsPrefix, DHPUtils.md5(rest)); - } else { - return String.format("%s|%s", prefix, originalId); - } - } - - public static String createOpenaireId( - final String type, - final String originalId, - final boolean to_md5) { - switch (type) { - case "datasource": - return createOpenaireId(10, originalId, to_md5); - case "organization": - return createOpenaireId(20, originalId, to_md5); - case "person": - return createOpenaireId(30, originalId, to_md5); - case "project": - return createOpenaireId(40, originalId, to_md5); - default: - return createOpenaireId(50, originalId, to_md5); - } - } - - public static String asString(final Object o) { - return o == null ? "" : o.toString(); - } - - public static Predicate distinctByKey( - final Function keyExtractor) { - final Map seen = new ConcurrentHashMap<>(); - return t -> seen.putIfAbsent(keyExtractor.apply(t), Boolean.TRUE) == null; - } - - public static Qualifier createBestAccessRights(final List instanceList) { - return getBestAccessRights(instanceList); - } - - protected static Qualifier getBestAccessRights(final List instanceList) { - if (instanceList != null) { - final Optional min = instanceList - .stream() - .map(i -> i.getAccessright()) - .min(new AccessRightComparator<>()); - - final Qualifier rights = min.isPresent() ? qualifier(min.get()) : new Qualifier(); - - if (StringUtils.isBlank(rights.getClassid())) { - rights.setClassid(UNKNOWN); - } - if (StringUtils.isBlank(rights.getClassname()) - || UNKNOWN.equalsIgnoreCase(rights.getClassname())) { - rights.setClassname(NOT_AVAILABLE); - } - if (StringUtils.isBlank(rights.getSchemeid())) { - rights.setSchemeid(DNET_ACCESS_MODES); - } - if (StringUtils.isBlank(rights.getSchemename())) { - rights.setSchemename(DNET_ACCESS_MODES); - } - - return rights; - } - return null; - } -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/ResultTypeComparator.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/ResultTypeComparator.java deleted file mode 100644 index 089d71a0c..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/ResultTypeComparator.java +++ /dev/null @@ -1,78 +0,0 @@ - -package eu.dnetlib.dhp.schema.oaf; - -import static eu.dnetlib.dhp.schema.common.ModelConstants.CROSSREF_ID; - -import java.util.Comparator; -import java.util.HashSet; -import java.util.Optional; -import java.util.stream.Collectors; -import java.util.stream.Stream; - -import com.google.common.collect.Sets; - -import eu.dnetlib.dhp.schema.common.ModelConstants; - -public class ResultTypeComparator implements Comparator { - - @Override - public int compare(Result left, Result right) { - - if (left == null && right == null) - return 0; - if (left == null) - return 1; - if (right == null) - return -1; - - HashSet lCf = getCollectedFromIds(left); - HashSet rCf = getCollectedFromIds(right); - - if (lCf.contains(CROSSREF_ID) && !rCf.contains(CROSSREF_ID)) { - return -1; - } - if (!lCf.contains(CROSSREF_ID) && rCf.contains(CROSSREF_ID)) { - return 1; - } - - String lClass = left.getResulttype().getClassid(); - String rClass = right.getResulttype().getClassid(); - - if (lClass.equals(rClass)) - return 0; - - if (lClass.equals(ModelConstants.PUBLICATION_RESULTTYPE_CLASSID)) - return -1; - if (rClass.equals(ModelConstants.PUBLICATION_RESULTTYPE_CLASSID)) - return 1; - - if (lClass.equals(ModelConstants.DATASET_RESULTTYPE_CLASSID)) - return -1; - if (rClass.equals(ModelConstants.DATASET_RESULTTYPE_CLASSID)) - return 1; - - if (lClass.equals(ModelConstants.SOFTWARE_RESULTTYPE_CLASSID)) - return -1; - if (rClass.equals(ModelConstants.SOFTWARE_RESULTTYPE_CLASSID)) - return 1; - - if (lClass.equals(ModelConstants.ORP_RESULTTYPE_CLASSID)) - return -1; - if (rClass.equals(ModelConstants.ORP_RESULTTYPE_CLASSID)) - return 1; - - // Else (but unlikely), lexicographical ordering will do. - return lClass.compareTo(rClass); - } - - protected HashSet getCollectedFromIds(Result left) { - return Optional - .ofNullable(left.getCollectedfrom()) - .map( - cf -> cf - .stream() - .map(c -> c.getKey()) - .collect(Collectors.toCollection(HashSet::new))) - .orElse(new HashSet<>()); - } -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactory.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactory.java deleted file mode 100644 index 6dabfc577..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactory.java +++ /dev/null @@ -1,212 +0,0 @@ - -package eu.dnetlib.dhp.schema.oaf.utils; - -import static com.google.common.base.Preconditions.checkArgument; -import static eu.dnetlib.dhp.schema.common.ModelConstants.*; - -import java.io.Serializable; -import java.util.*; -import java.util.function.Function; -import java.util.stream.Collectors; -import java.util.stream.Stream; - -import org.apache.commons.lang3.StringUtils; - -import com.google.common.collect.HashBiMap; -import com.google.common.collect.Maps; - -import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.*; -import eu.dnetlib.dhp.utils.DHPUtils; - -/** - * Factory class for OpenAIRE identifiers in the Graph - */ -public class IdentifierFactory implements Serializable { - - public static final String ID_SEPARATOR = "::"; - public static final String ID_PREFIX_SEPARATOR = "|"; - - public static final int ID_PREFIX_LEN = 12; - - /** - * Declares the associations PID_TYPE -> [DATASOURCE ID, NAME] considered authoritative for that PID_TYPE - */ - public static final Map> PID_AUTHORITY = Maps.newHashMap(); - - static { - PID_AUTHORITY.put(PidType.doi, HashBiMap.create()); - PID_AUTHORITY.get(PidType.doi).put(CROSSREF_ID, "Crossref"); - PID_AUTHORITY.get(PidType.doi).put(DATACITE_ID, "Datacite"); - - PID_AUTHORITY.put(PidType.pmc, HashBiMap.create()); - PID_AUTHORITY.get(PidType.pmc).put(EUROPE_PUBMED_CENTRAL_ID, "Europe PubMed Central"); - PID_AUTHORITY.get(PidType.pmc).put(PUBMED_CENTRAL_ID, "PubMed Central"); - - PID_AUTHORITY.put(PidType.pmid, HashBiMap.create()); - PID_AUTHORITY.get(PidType.pmid).put(EUROPE_PUBMED_CENTRAL_ID, "Europe PubMed Central"); - PID_AUTHORITY.get(PidType.pmid).put(PUBMED_CENTRAL_ID, "PubMed Central"); - - PID_AUTHORITY.put(PidType.arXiv, HashBiMap.create()); - PID_AUTHORITY.get(PidType.arXiv).put(ARXIV_ID, "arXiv.org e-Print Archive"); - } - - public static List getPids(List pid, KeyValue collectedFrom) { - return pidFromInstance(pid, collectedFrom, true).distinct().collect(Collectors.toList()); - } - - public static String createDOIBoostIdentifier(T entity) { - if (entity == null) - return null; - - StructuredProperty pid = null; - if (entity.getPid() != null) { - pid = entity - .getPid() - .stream() - .filter(Objects::nonNull) - .filter(s -> s.getQualifier() != null && "doi".equalsIgnoreCase(s.getQualifier().getClassid())) - .filter(CleaningFunctions::pidFilter) - .findAny() - .orElse(null); - } else { - if (entity.getInstance() != null) { - pid = entity - .getInstance() - .stream() - .filter(i -> i.getPid() != null) - .flatMap(i -> i.getPid().stream()) - .filter(CleaningFunctions::pidFilter) - .findAny() - .orElse(null); - } - } - if (pid != null) - return idFromPid(entity, pid, true); - return null; - } - - /** - * Creates an identifier from the most relevant PID (if available) provided by a known PID authority in the given - * entity T. Returns entity.id when none of the PIDs meet the selection criteria is available. - * - * @param entity the entity providing PIDs and a default ID. - * @param the specific entity type. Currently Organization and Result subclasses are supported. - * @param md5 indicates whether should hash the PID value or not. - * @return an identifier from the most relevant PID, entity.id otherwise - */ - public static String createIdentifier(T entity, boolean md5) { - - checkArgument(StringUtils.isNoneBlank(entity.getId()), "missing entity identifier"); - - final Map> pids = extractPids(entity); - - return pids - .values() - .stream() - .flatMap(s -> s.stream()) - .min(new PidComparator<>(entity)) - .map( - min -> Optional - .ofNullable(pids.get(min.getQualifier().getClassid())) - .map( - p -> p - .stream() - .sorted(new PidValueComparator()) - .findFirst() - .map(s -> idFromPid(entity, s, md5)) - .orElseGet(entity::getId)) - .orElseGet(entity::getId)) - .orElseGet(entity::getId); - } - - private static Map> extractPids(T entity) { - if (entity instanceof Result) { - return Optional - .ofNullable(((Result) entity).getInstance()) - .map( - instance -> mapPids(instance)) - .orElse(new HashMap<>()); - } else { - return entity - .getPid() - .stream() - .map(CleaningFunctions::normalizePidValue) - .filter(CleaningFunctions::pidFilter) - .collect( - Collectors - .groupingBy( - p -> p.getQualifier().getClassid(), - Collectors.mapping(p -> p, Collectors.toCollection(HashSet::new)))); - } - } - - private static Map> mapPids(List instance) { - return instance - .stream() - .map(i -> pidFromInstance(i.getPid(), i.getCollectedfrom(), false)) - .flatMap(Function.identity()) - .collect( - Collectors - .groupingBy( - p -> p.getQualifier().getClassid(), - Collectors.mapping(p -> p, Collectors.toCollection(HashSet::new)))); - } - - private static Stream pidFromInstance(List pid, KeyValue collectedFrom, - boolean mapHandles) { - return Optional - .ofNullable(pid) - .map( - pp -> pp - .stream() - // filter away PIDs provided by a DS that is not considered an authority for the - // given PID Type - .filter(p -> { - return shouldFilterPid(collectedFrom, p, mapHandles); - }) - .map(CleaningFunctions::normalizePidValue) - .filter(CleaningFunctions::pidFilter)) - .orElse(Stream.empty()); - } - - private static boolean shouldFilterPid(KeyValue collectedFrom, StructuredProperty p, boolean mapHandles) { - final PidType pType = PidType.tryValueOf(p.getQualifier().getClassid()); - return (mapHandles && pType.equals(PidType.handle)) || Optional.ofNullable(collectedFrom).isPresent() && - Optional - .ofNullable(PID_AUTHORITY.get(pType)) - .map(authorities -> { - return authorities.containsKey(collectedFrom.getKey()) - || authorities.containsValue(collectedFrom.getValue()); - }) - .orElse(false); - } - - /** - * @see {@link IdentifierFactory#createIdentifier(OafEntity, boolean)} - */ - public static String createIdentifier(T entity) { - - return createIdentifier(entity, true); - } - - private static String idFromPid(T entity, StructuredProperty s, boolean md5) { - return new StringBuilder() - .append(ModelSupport.getIdPrefix(entity.getClass())) - .append(ID_PREFIX_SEPARATOR) - .append(createPrefix(s.getQualifier().getClassid())) - .append(ID_SEPARATOR) - .append(md5 ? DHPUtils.md5(s.getValue()) : s.getValue()) - .toString(); - } - - // create the prefix (length = 12) - private static String createPrefix(String pidType) { - StringBuilder prefix = new StringBuilder(StringUtils.left(pidType, ID_PREFIX_LEN)); - while (prefix.length() < ID_PREFIX_LEN) { - prefix.append("_"); - } - return prefix.substring(0, ID_PREFIX_LEN); - } - -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OrganizationPidComparator.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OrganizationPidComparator.java deleted file mode 100644 index 3a6df2924..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OrganizationPidComparator.java +++ /dev/null @@ -1,38 +0,0 @@ - -package eu.dnetlib.dhp.schema.oaf.utils; - -import java.util.Comparator; - -import eu.dnetlib.dhp.schema.oaf.StructuredProperty; - -public class OrganizationPidComparator implements Comparator { - - @Override - public int compare(StructuredProperty left, StructuredProperty right) { - - PidType lClass = PidType.tryValueOf(left.getQualifier().getClassid()); - PidType rClass = PidType.tryValueOf(right.getQualifier().getClassid()); - - if (lClass.equals(PidType.openorgs)) - return -1; - if (rClass.equals(PidType.openorgs)) - return 1; - - if (lClass.equals(PidType.GRID)) - return -1; - if (rClass.equals(PidType.GRID)) - return 1; - - if (lClass.equals(PidType.mag_id)) - return -1; - if (rClass.equals(PidType.mag_id)) - return 1; - - if (lClass.equals(PidType.urn)) - return -1; - if (rClass.equals(PidType.urn)) - return 1; - - return 0; - } -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidBlacklist.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidBlacklist.java deleted file mode 100644 index 0b8e5e3f1..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidBlacklist.java +++ /dev/null @@ -1,8 +0,0 @@ - -package eu.dnetlib.dhp.schema.oaf.utils; - -import java.util.HashMap; -import java.util.HashSet; - -public class PidBlacklist extends HashMap> { -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidBlacklistProvider.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidBlacklistProvider.java deleted file mode 100644 index 1c1c21f92..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidBlacklistProvider.java +++ /dev/null @@ -1,37 +0,0 @@ - -package eu.dnetlib.dhp.schema.oaf.utils; - -import java.io.IOException; -import java.util.HashSet; -import java.util.Optional; -import java.util.Set; - -import org.apache.commons.io.IOUtils; - -import com.fasterxml.jackson.databind.ObjectMapper; - -public class PidBlacklistProvider { - - private static final PidBlacklist blacklist; - - static { - try { - String json = IOUtils.toString(IdentifierFactory.class.getResourceAsStream("pid_blacklist.json")); - blacklist = new ObjectMapper().readValue(json, PidBlacklist.class); - - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - public static PidBlacklist getBlacklist() { - return blacklist; - } - - public static Set getBlacklist(String pidType) { - return Optional - .ofNullable(getBlacklist().get(pidType)) - .orElse(new HashSet<>()); - } - -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidComparator.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidComparator.java deleted file mode 100644 index 2bee0eb56..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidComparator.java +++ /dev/null @@ -1,48 +0,0 @@ - -package eu.dnetlib.dhp.schema.oaf.utils; - -import java.util.Comparator; - -import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.OafEntity; -import eu.dnetlib.dhp.schema.oaf.Organization; -import eu.dnetlib.dhp.schema.oaf.Result; -import eu.dnetlib.dhp.schema.oaf.StructuredProperty; - -public class PidComparator implements Comparator { - - private T entity; - - public PidComparator(T entity) { - this.entity = entity; - } - - @Override - public int compare(StructuredProperty left, StructuredProperty right) { - - if (left == null && right == null) - return 0; - if (left == null) - return 1; - if (right == null) - return -1; - - if (ModelSupport.isSubClass(entity, Result.class)) { - return compareResultPids(left, right); - } - if (ModelSupport.isSubClass(entity, Organization.class)) { - return compareOrganizationtPids(left, right); - } - - // Else (but unlikely), lexicographical ordering will do. - return left.getQualifier().getClassid().compareTo(right.getQualifier().getClassid()); - } - - private int compareResultPids(StructuredProperty left, StructuredProperty right) { - return new ResultPidComparator().compare(left, right); - } - - private int compareOrganizationtPids(StructuredProperty left, StructuredProperty right) { - return new OrganizationPidComparator().compare(left, right); - } -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidType.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidType.java deleted file mode 100644 index 5a297be5e..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidType.java +++ /dev/null @@ -1,29 +0,0 @@ - -package eu.dnetlib.dhp.schema.oaf.utils; - -import org.apache.commons.lang3.EnumUtils; - -public enum PidType { - - // Result - doi, pmid, pmc, handle, arXiv, nct, pdb, - - // Organization - openorgs, corda, corda_h2020, GRID, mag_id, urn, - - // Used by dedup - undefined, original; - - public static boolean isValid(String type) { - return EnumUtils.isValidEnum(PidType.class, type); - } - - public static PidType tryValueOf(String s) { - try { - return PidType.valueOf(s); - } catch (Exception e) { - return PidType.original; - } - } - -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidValueComparator.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidValueComparator.java deleted file mode 100644 index 7e53ba9b7..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidValueComparator.java +++ /dev/null @@ -1,33 +0,0 @@ - -package eu.dnetlib.dhp.schema.oaf.utils; - -import java.util.Comparator; -import java.util.Optional; - -import eu.dnetlib.dhp.schema.oaf.*; - -public class PidValueComparator implements Comparator { - - @Override - public int compare(StructuredProperty left, StructuredProperty right) { - - if (left == null && right == null) - return 0; - if (left == null) - return 1; - if (right == null) - return -1; - - StructuredProperty l = CleaningFunctions.normalizePidValue(left); - StructuredProperty r = CleaningFunctions.normalizePidValue(right); - - return Optional - .ofNullable(l.getValue()) - .map( - lv -> Optional - .ofNullable(r.getValue()) - .map(rv -> lv.compareTo(rv)) - .orElse(-1)) - .orElse(1); - } -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/ResultPidComparator.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/ResultPidComparator.java deleted file mode 100644 index e51c4801f..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/ResultPidComparator.java +++ /dev/null @@ -1,53 +0,0 @@ - -package eu.dnetlib.dhp.schema.oaf.utils; - -import java.util.Comparator; - -import eu.dnetlib.dhp.schema.oaf.StructuredProperty; - -public class ResultPidComparator implements Comparator { - - @Override - public int compare(StructuredProperty left, StructuredProperty right) { - - PidType lClass = PidType.tryValueOf(left.getQualifier().getClassid()); - PidType rClass = PidType.tryValueOf(right.getQualifier().getClassid()); - - if (lClass.equals(PidType.doi)) - return -1; - if (rClass.equals(PidType.doi)) - return 1; - - if (lClass.equals(PidType.pmid)) - return -1; - if (rClass.equals(PidType.pmid)) - return 1; - - if (lClass.equals(PidType.pmc)) - return -1; - if (rClass.equals(PidType.pmc)) - return 1; - - if (lClass.equals(PidType.handle)) - return -1; - if (rClass.equals(PidType.handle)) - return 1; - - if (lClass.equals(PidType.arXiv)) - return -1; - if (rClass.equals(PidType.arXiv)) - return 1; - - if (lClass.equals(PidType.nct)) - return -1; - if (rClass.equals(PidType.nct)) - return 1; - - if (lClass.equals(PidType.pdb)) - return -1; - if (rClass.equals(PidType.pdb)) - return 1; - - return 0; - } -} diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/OafMapperUtilsTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/OafMapperUtilsTest.java deleted file mode 100644 index 93840d534..000000000 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/OafMapperUtilsTest.java +++ /dev/null @@ -1,69 +0,0 @@ - -package eu.dnetlib.dhp.schema.oaf; - -import static org.junit.jupiter.api.Assertions.*; - -import java.io.IOException; -import java.util.HashSet; -import java.util.List; -import java.util.stream.Collectors; - -import org.apache.commons.io.IOUtils; -import org.jetbrains.annotations.NotNull; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.Test; - -import com.fasterxml.jackson.databind.DeserializationFeature; -import com.fasterxml.jackson.databind.ObjectMapper; - -import eu.dnetlib.dhp.schema.common.ModelConstants; -import it.unimi.dsi.fastutil.Hash; - -public class OafMapperUtilsTest { - - private static ObjectMapper OBJECT_MAPPER = new ObjectMapper() - .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); - - @Test - public void testMergePubs() throws IOException { - Publication p1 = read("publication_1.json", Publication.class); - Publication p2 = read("publication_2.json", Publication.class); - Dataset d1 = read("dataset_1.json", Dataset.class); - Dataset d2 = read("dataset_2.json", Dataset.class); - - assertEquals(p1.getCollectedfrom().size(), 1); - assertEquals(p1.getCollectedfrom().get(0).getKey(), ModelConstants.CROSSREF_ID); - assertEquals(d2.getCollectedfrom().size(), 1); - assertFalse(cfId(d2.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID)); - - assertTrue( - OafMapperUtils - .mergeResults(p1, d2) - .getResulttype() - .getClassid() - .equals(ModelConstants.PUBLICATION_RESULTTYPE_CLASSID)); - - assertEquals(p2.getCollectedfrom().size(), 1); - assertFalse(cfId(p2.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID)); - assertEquals(d1.getCollectedfrom().size(), 1); - assertTrue(cfId(d1.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID)); - - assertTrue( - OafMapperUtils - .mergeResults(p2, d1) - .getResulttype() - .getClassid() - .equals(ModelConstants.DATASET_RESULTTYPE_CLASSID)); - } - - @NotNull - protected HashSet cfId(List collectedfrom) { - return collectedfrom.stream().map(c -> c.getKey()).collect(Collectors.toCollection(HashSet::new)); - } - - protected T read(String filename, Class clazz) throws IOException { - final String json = IOUtils.toString(getClass().getResourceAsStream(filename)); - return OBJECT_MAPPER.readValue(json, clazz); - } - -} diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/BlackListProviderTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/BlackListProviderTest.java deleted file mode 100644 index 203cda0ca..000000000 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/BlackListProviderTest.java +++ /dev/null @@ -1,21 +0,0 @@ - -package eu.dnetlib.dhp.schema.oaf.utils; - -import java.util.Set; - -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.Test; - -public class BlackListProviderTest { - - @Test - public void blackListTest() { - - Assertions.assertNotNull(PidBlacklistProvider.getBlacklist()); - Assertions.assertNotNull(PidBlacklistProvider.getBlacklist().get("doi")); - Assertions.assertTrue(PidBlacklistProvider.getBlacklist().get("doi").size() > 0); - final Set xxx = PidBlacklistProvider.getBlacklist("xxx"); - Assertions.assertNotNull(xxx); - Assertions.assertEquals(0, xxx.size()); - } -} diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactoryTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactoryTest.java deleted file mode 100644 index 935b74b08..000000000 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactoryTest.java +++ /dev/null @@ -1,75 +0,0 @@ - -package eu.dnetlib.dhp.schema.oaf.utils; - -import static org.junit.jupiter.api.Assertions.*; - -import java.io.IOException; - -import org.apache.commons.io.IOUtils; -import org.junit.jupiter.api.Test; - -import com.fasterxml.jackson.databind.DeserializationFeature; -import com.fasterxml.jackson.databind.ObjectMapper; - -import eu.dnetlib.dhp.schema.oaf.Publication; -import eu.dnetlib.dhp.utils.DHPUtils; - -public class IdentifierFactoryTest { - - private static ObjectMapper OBJECT_MAPPER = new ObjectMapper() - .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); - - @Test - public void testCreateIdentifierForPublication() throws IOException { - - verifyIdentifier( - "publication_doi1.json", "50|doi_________::79dbc7a2a56dc1532659f9038843256e", true); - - verifyIdentifier( - "publication_doi2.json", "50|doi_________::79dbc7a2a56dc1532659f9038843256e", true); - - verifyIdentifier( - "publication_doi3.json", "50|pmc_________::94e4cb08c93f8733b48e2445d04002ac", true); - - verifyIdentifier( - "publication_doi4.json", "50|od______2852::38861c44e6052a8d49f59a4c39ba5e66", true); - - verifyIdentifier( - "publication_pmc1.json", "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", true); - - verifyIdentifier( - "publication_pmc2.json", "50|pmc_________::94e4cb08c93f8733b48e2445d04002ac", true); - - final String defaultID = "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f"; - verifyIdentifier("publication_3.json", defaultID, true); - verifyIdentifier("publication_4.json", defaultID, true); - verifyIdentifier("publication_5.json", defaultID, true); - - } - - @Test - public void testCreateIdentifierForPublicationNoHash() throws IOException { - - verifyIdentifier("publication_doi1.json", "50|doi_________::10.1016/j.cmet.2010.03.013", false); - verifyIdentifier("publication_doi2.json", "50|doi_________::10.1016/j.cmet.2010.03.013", false); - verifyIdentifier("publication_pmc1.json", "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", false); - verifyIdentifier( - "publication_urn1.json", "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", false); - - final String defaultID = "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f"; - verifyIdentifier("publication_3.json", defaultID, false); - verifyIdentifier("publication_4.json", defaultID, false); - verifyIdentifier("publication_5.json", defaultID, false); - } - - protected void verifyIdentifier(String filename, String expectedID, boolean md5) throws IOException { - final String json = IOUtils.toString(getClass().getResourceAsStream(filename)); - final Publication pub = OBJECT_MAPPER.readValue(json, Publication.class); - - String id = IdentifierFactory.createIdentifier(pub, md5); - - assertNotNull(id); - assertEquals(expectedID, id); - } - -} diff --git a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/dataset_1.json b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/dataset_1.json deleted file mode 100644 index e38c4d1cc..000000000 --- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/dataset_1.json +++ /dev/null @@ -1 +0,0 @@ -{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1g", "resuttype" : { "classid" : "dataset" }, "pid":[{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2011.03.013"},{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}], "collectedfrom" : [ { "key" : "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2", "value" : "Crossref"} ]} \ No newline at end of file diff --git a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/dataset_2.json b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/dataset_2.json deleted file mode 100644 index 52e4e126a..000000000 --- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/dataset_2.json +++ /dev/null @@ -1 +0,0 @@ -{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1g", "resuttype" : { "classid" : "dataset" }, "pid":[{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2011.03.013"},{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}], "collectedfrom" : [ { "key" : "10|openaire____::081b82f96300b6a6e3d282bad31cb6e3", "value" : "Repository B"} ]} \ No newline at end of file diff --git a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/publication_1.json b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/publication_1.json deleted file mode 100644 index 704c5ad4d..000000000 --- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/publication_1.json +++ /dev/null @@ -1 +0,0 @@ -{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", "resuttype" : { "classid" : "publication" }, "pid":[{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2011.03.013"},{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}], "collectedfrom" : [ { "key" : "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2", "value" : "Crossref"} ]} \ No newline at end of file diff --git a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/publication_2.json b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/publication_2.json deleted file mode 100644 index a1744e84e..000000000 --- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/publication_2.json +++ /dev/null @@ -1 +0,0 @@ -{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", "resuttype" : { "classid" : "publication" }, "pid":[{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2011.03.013"},{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}], "collectedfrom" : [ { "key" : "10|openaire____::081b82f96300b6a6e3d282bad31cb6e3", "value" : "Repository A"} ]} \ No newline at end of file diff --git a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_3.json b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_3.json deleted file mode 100644 index 6d33568f4..000000000 --- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_3.json +++ /dev/null @@ -1 +0,0 @@ -{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f","pid":[{"qualifier":{"classid":"scp-number"},"value":"79953761260"}]} \ No newline at end of file diff --git a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_4.json b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_4.json deleted file mode 100644 index 6617fe15f..000000000 --- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_4.json +++ /dev/null @@ -1 +0,0 @@ -{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f","pid":[]} \ No newline at end of file diff --git a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_5.json b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_5.json deleted file mode 100644 index 700a10046..000000000 --- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_5.json +++ /dev/null @@ -1 +0,0 @@ -{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f"} \ No newline at end of file diff --git a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi1.json b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi1.json deleted file mode 100644 index 83bc0cd20..000000000 --- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi1.json +++ /dev/null @@ -1,33 +0,0 @@ -{ - "id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", - "instance": [ - { - "collectedfrom": { - "key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2", - "value": "Crossref" - }, - "pid": [ - { - "qualifier": {"classid": "doi"}, - "value": "10.1016/j.cmet.2010.03.013" - } - ] - }, - { - "pid": [ - { - "qualifier": {"classid": "urn"}, - "value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2" - }, - { - "qualifier": {"classid": "scp-number"}, - "value": "79953761260" - }, - { - "qualifier": {"classid": "pmc"}, - "value": "21459329" - } - ] - } - ] -} \ No newline at end of file diff --git a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi2.json b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi2.json deleted file mode 100644 index 5c73fc3c7..000000000 --- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi2.json +++ /dev/null @@ -1,37 +0,0 @@ -{ - "id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", - "instance": [ - { - "collectedfrom": { - "key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2", - "value": "Crossref" - }, - "pid": [ - { - "qualifier": {"classid": "doi"}, - "value": "10.1016/j.cmet.2010.03.013" - } - ] - }, - { - "collectedfrom": { - "key": "10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c", - "value": "Europe PubMed Central" - }, - "pid": [ - { - "qualifier": {"classid": "urn"}, - "value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2" - }, - { - "qualifier": {"classid": "scp-number"}, - "value": "79953761260" - }, - { - "qualifier": {"classid": "pmc"}, - "value": "21459329" - } - ] - } - ] -} \ No newline at end of file diff --git a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi3.json b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi3.json deleted file mode 100644 index 97c40d4bb..000000000 --- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi3.json +++ /dev/null @@ -1,37 +0,0 @@ -{ - "id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", - "instance": [ - { - "collectedfrom": { - "key": "10|openaire____::1234", - "value": "Zenodo" - }, - "pid": [ - { - "qualifier": {"classid": "doi"}, - "value": "10.1016/j.cmet.2010.03.013" - } - ] - }, - { - "collectedfrom": { - "key": "10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c", - "value": "Europe PubMed Central" - }, - "pid": [ - { - "qualifier": {"classid": "urn"}, - "value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2" - }, - { - "qualifier": {"classid": "scp-number"}, - "value": "79953761260" - }, - { - "qualifier": {"classid": "pmc"}, - "value": "21459329" - } - ] - } - ] -} \ No newline at end of file diff --git a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi4.json b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi4.json deleted file mode 100644 index ac99ca93a..000000000 --- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi4.json +++ /dev/null @@ -1,37 +0,0 @@ -{ - "id": "50|od______2852::38861c44e6052a8d49f59a4c39ba5e66", - "instance": [ - { - "collectedfrom": { - "key": "10|openaire____::1234", - "value": "Zenodo" - }, - "pid": [ - { - "qualifier": {"classid": "doi"}, - "value": "10.1016/j.cmet.2010.03.013" - }, - { - "qualifier": {"classid": "handle"}, - "value": "11012/83840" - } - ] - }, - { - "collectedfrom": { - "key": "10|opendoar____::2852", - "value": "Digital library of Brno University of Technology" - }, - "pid": [ - { - "qualifier": {"classid": "pmc"}, - "value": "21459329" - }, - { - "qualifier": {"classid": "handle"}, - "value": "11012/83840" - } - ] - } - ] -} \ No newline at end of file diff --git a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_pmc1.json b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_pmc1.json deleted file mode 100644 index 3e4ba2246..000000000 --- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_pmc1.json +++ /dev/null @@ -1 +0,0 @@ -{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f","pid":[{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}]} \ No newline at end of file diff --git a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_pmc2.json b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_pmc2.json deleted file mode 100644 index e7d49eebb..000000000 --- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_pmc2.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", - "instance": [ - { - "collectedfrom": { - "key": "10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c", - "value": "Europe PubMed Central" - }, - "pid": [ - { - "qualifier": {"classid": "doi"}, - "value": "10.1016/j.cmet.2010.03.013" - }, - { - "qualifier":{"classid":"pmc"}, - "value":"21459329" - } - ] - } - ] -} \ No newline at end of file diff --git a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_urn1.json b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_urn1.json deleted file mode 100644 index 5323ac8bd..000000000 --- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_urn1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", - "pid": [ - { - "qualifier": { - "classid": "urn" - }, - "value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2" - }, - { - "qualifier": { - "classid": "scp-number" - }, - "value": "79953761260" - }, - { - "qualifier": { - "classid": "pmcid" - }, - "value": "21459329" - } - ] -} \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteToOAFTransformation.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteToOAFTransformation.scala index 895e3261e..979ab4371 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteToOAFTransformation.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteToOAFTransformation.scala @@ -4,10 +4,10 @@ import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup import eu.dnetlib.dhp.schema.action.AtomicAction import eu.dnetlib.dhp.schema.common.ModelConstants -import eu.dnetlib.dhp.schema.oaf.{Author, DataInfo, Instance, KeyValue, Oaf, OafMapperUtils, OtherResearchProduct, Publication, Qualifier, Relation, Result, Software, StructuredProperty, Dataset => OafDataset} +import eu.dnetlib.dhp.schema.oaf.{Author, DataInfo, Instance, KeyValue, Oaf, OtherResearchProduct, Publication, Qualifier, Relation, Result, Software, StructuredProperty, Dataset => OafDataset} import eu.dnetlib.dhp.schema.common.ModelConstants -import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory -import eu.dnetlib.dhp.schema.oaf.{AccessRight, Author, DataInfo, Instance, KeyValue, Oaf, OafMapperUtils, OtherResearchProduct, Publication, Qualifier, Relation, Result, Software, StructuredProperty, Dataset => OafDataset} +import eu.dnetlib.dhp.schema.oaf.utils.{IdentifierFactory, OafMapperUtils} +import eu.dnetlib.dhp.schema.oaf.{AccessRight, Author, DataInfo, Instance, KeyValue, Oaf, OtherResearchProduct, Publication, Qualifier, Relation, Result, Software, StructuredProperty, Dataset => OafDataset} import eu.dnetlib.dhp.utils.DHPUtils import org.apache.commons.lang3.StringUtils import org.json4s.DefaultFormats diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestIterator.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestIterator.java index e0925b1c5..16d509aa8 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestIterator.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestIterator.java @@ -1,26 +1,6 @@ package eu.dnetlib.dhp.collection.plugin.rest; -import eu.dnetlib.dhp.collection.CollectorException; -import eu.dnetlib.dhp.collection.HttpClientParams; -import eu.dnetlib.dhp.collection.JsonUtils; -import org.apache.commons.io.IOUtils; -import org.apache.commons.lang3.StringUtils; -import org.apache.http.HttpHeaders; -import org.apache.http.entity.ContentType; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.w3c.dom.Node; -import org.w3c.dom.NodeList; -import org.xml.sax.InputSource; - -import javax.xml.transform.OutputKeys; -import javax.xml.transform.Transformer; -import javax.xml.transform.TransformerConfigurationException; -import javax.xml.transform.TransformerFactory; -import javax.xml.transform.dom.DOMSource; -import javax.xml.transform.stream.StreamResult; -import javax.xml.xpath.*; import java.io.InputStream; import java.io.StringWriter; import java.io.UnsupportedEncodingException; @@ -32,6 +12,28 @@ import java.util.Iterator; import java.util.Queue; import java.util.concurrent.PriorityBlockingQueue; +import javax.xml.transform.OutputKeys; +import javax.xml.transform.Transformer; +import javax.xml.transform.TransformerConfigurationException; +import javax.xml.transform.TransformerFactory; +import javax.xml.transform.dom.DOMSource; +import javax.xml.transform.stream.StreamResult; +import javax.xml.xpath.*; + +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.http.HttpHeaders; +import org.apache.http.entity.ContentType; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; +import org.xml.sax.InputSource; + +import eu.dnetlib.dhp.collection.CollectorException; +import eu.dnetlib.dhp.collection.HttpClientParams; +import eu.dnetlib.dhp.collection.JsonUtils; + /** * log.info(...) equal to log.trace(...) in the application-logs *

diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java index a3726d60a..80d25da4a 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java @@ -32,6 +32,7 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import scala.Tuple2; /** diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyOpenorgsMergeRels.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyOpenorgsMergeRels.java index bd330ba87..9d0f61007 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyOpenorgsMergeRels.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyOpenorgsMergeRels.java @@ -2,15 +2,11 @@ package eu.dnetlib.dhp.oa.dedup; import java.io.IOException; -import java.util.ArrayList; -import java.util.List; import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.function.MapFunction; -import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SaveMode; import org.apache.spark.sql.SparkSession; @@ -18,20 +14,12 @@ import org.dom4j.DocumentException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.sun.media.sound.ModelChannelMixer; - import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.DataInfo; -import eu.dnetlib.dhp.schema.oaf.KeyValue; -import eu.dnetlib.dhp.schema.oaf.Qualifier; import eu.dnetlib.dhp.schema.oaf.Relation; -import eu.dnetlib.dhp.utils.DHPUtils; import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; -import eu.dnetlib.pace.config.DedupConfig; -import net.sf.saxon.ma.trie.Tuple2; public class SparkCopyOpenorgsMergeRels extends AbstractSparkAction { private static final Logger log = LoggerFactory.getLogger(SparkCopyOpenorgsMergeRels.class); diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/model/Identifier.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/model/Identifier.java index d7f0644fd..e821d7ef5 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/model/Identifier.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/model/Identifier.java @@ -15,6 +15,7 @@ import eu.dnetlib.dhp.schema.common.EntityType; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import eu.dnetlib.dhp.schema.oaf.utils.PidComparator; import eu.dnetlib.dhp.schema.oaf.utils.PidType; diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/IdGeneratorTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/IdGeneratorTest.java index 6b0b8dfa2..294d24031 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/IdGeneratorTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/IdGeneratorTest.java @@ -23,6 +23,7 @@ import com.google.common.collect.Lists; import eu.dnetlib.dhp.oa.dedup.model.Identifier; import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import eu.dnetlib.pace.util.MapDocumentUtil; import scala.Tuple2; diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsDedupTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsDedupTest.java index 757ffcf05..8154a87ef 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsDedupTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsDedupTest.java @@ -3,7 +3,6 @@ package eu.dnetlib.dhp.oa.dedup; import static java.nio.file.Files.createTempDirectory; -import static org.apache.spark.sql.functions.count; import static org.junit.jupiter.api.Assertions.*; import static org.mockito.Mockito.lenient; @@ -23,21 +22,11 @@ import java.util.Properties; import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaPairRDD; -import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.api.java.function.FilterFunction; -import org.apache.spark.api.java.function.ForeachFunction; -import org.apache.spark.api.java.function.MapFunction; -import org.apache.spark.api.java.function.PairFunction; -import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; -import org.apache.spark.sql.Row; import org.apache.spark.sql.SparkSession; -import org.apache.spark.util.CollectionsUtils; import org.junit.jupiter.api.*; import org.junit.jupiter.api.extension.ExtendWith; -import org.junit.platform.commons.util.StringUtils; import org.mockito.Mock; import org.mockito.Mockito; import org.mockito.junit.jupiter.MockitoExtension; @@ -46,14 +35,10 @@ import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.schema.oaf.OafMapperUtils; -import eu.dnetlib.dhp.schema.oaf.Organization; import eu.dnetlib.dhp.schema.oaf.Relation; -import eu.dnetlib.dhp.utils.DHPUtils; +import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; -import eu.dnetlib.pace.util.MapDocumentUtil; -import scala.Tuple2; @ExtendWith(MockitoExtension.class) @TestMethodOrder(MethodOrderer.OrderAnnotation.class) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java index 050f4d327..ff46c3383 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java @@ -18,6 +18,7 @@ import com.google.gson.*; import eu.dnetlib.dhp.common.PacePerson; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import eu.dnetlib.dhp.utils.DHPUtils; import eu.dnetlib.doiboost.orcidnodoi.util.DumpToActionsUtility; import eu.dnetlib.doiboost.orcidnodoi.util.Pair; diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleanGraphSparkJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleanGraphSparkJob.java index 088539325..262a352b0 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleanGraphSparkJob.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleanGraphSparkJob.java @@ -2,12 +2,11 @@ package eu.dnetlib.dhp.oa.graph.clean; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; -import static eu.dnetlib.dhp.schema.oaf.CleaningFunctions.*; +import static eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions.*; import java.util.Optional; import org.apache.commons.io.IOUtils; -import org.apache.commons.lang3.StringUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FilterFunction; import org.apache.spark.api.java.function.MapFunction; diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java index 77d47b285..ad256a3c5 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java @@ -2,7 +2,7 @@ package eu.dnetlib.dhp.oa.graph.raw; import static eu.dnetlib.dhp.schema.common.ModelConstants.*; -import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.*; +import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*; import java.util.*; import java.util.stream.Collectors; @@ -19,6 +19,7 @@ import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; +import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; public abstract class AbstractMdRecordToOafMapper { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java index c0d70c0ea..4d18f7cad 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java @@ -2,15 +2,7 @@ package eu.dnetlib.dhp.oa.graph.raw; import static eu.dnetlib.dhp.schema.common.ModelConstants.*; -import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.asString; -import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.createOpenaireId; -import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.dataInfo; -import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.field; -import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.journal; -import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.listFields; -import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.listKeyValues; -import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.qualifier; -import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.structuredProperty; +import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*; import java.io.Closeable; import java.io.IOException; diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java index ea9394890..789f8a42b 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java @@ -2,7 +2,7 @@ package eu.dnetlib.dhp.oa.graph.raw; import static eu.dnetlib.dhp.schema.common.ModelConstants.*; -import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.*; +import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*; import java.util.ArrayList; import java.util.HashSet; @@ -20,8 +20,9 @@ import com.google.common.collect.Lists; import eu.dnetlib.dhp.common.PacePerson; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.schema.oaf.*; -import eu.dnetlib.dhp.schema.oaf.CleaningFunctions; +import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; +import eu.dnetlib.dhp.schema.oaf.utils.ModelHardLimits; public class OafToOafMapper extends AbstractMdRecordToOafMapper { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java index 116ec6202..4624f171b 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java @@ -2,7 +2,7 @@ package eu.dnetlib.dhp.oa.graph.raw; import static eu.dnetlib.dhp.schema.common.ModelConstants.*; -import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.*; +import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*; import java.util.*; import java.util.stream.Collectors; @@ -13,8 +13,8 @@ import org.dom4j.Node; import eu.dnetlib.dhp.common.PacePerson; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; -import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; public class OdfToOafMapper extends AbstractMdRecordToOafMapper { diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleaningFunctionTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleaningFunctionTest.java index 15cb054ad..8a4fbaafb 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleaningFunctionTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleaningFunctionTest.java @@ -21,6 +21,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java index 2e46f0f50..d5e48190f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java @@ -19,6 +19,7 @@ import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.oa.graph.clean.CleaningFunctionTest; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java index 31ce3aaa6..52a909f76 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java @@ -30,6 +30,7 @@ import com.google.common.collect.Lists; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; @ExtendWith(MockitoExtension.class) public class MigrateDbEntitiesApplicationTest { diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java index d0c379d9e..7534ce4bd 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java @@ -28,6 +28,7 @@ import eu.dnetlib.dhp.oa.provision.model.RelatedEntity; import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper; import eu.dnetlib.dhp.schema.common.EntityType; import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.utils.ModelHardLimits; import scala.Tuple2; /** diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase2.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase2.java index 612e7db06..c013a2bf6 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase2.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase2.java @@ -30,6 +30,7 @@ import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.utils.ModelHardLimits; import scala.Tuple2; import scala.collection.JavaConverters; import scala.collection.Seq;