wip: large refactoring

This commit is contained in:
Claudio Atzori 2023-02-09 12:32:28 +01:00
parent d9c9482a5b
commit 934c1846f8
70 changed files with 1537 additions and 1418 deletions

View File

@ -6,7 +6,7 @@
<parent>
<groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp-build</artifactId>
<version>1.2.5-SNAPSHOT</version>
<version>2.0.0-SNAPSHOT</version>
</parent>
<artifactId>dhp-build-assembly-resources</artifactId>

View File

@ -6,7 +6,7 @@
<parent>
<groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp-build</artifactId>
<version>1.2.5-SNAPSHOT</version>
<version>2.0.0-SNAPSHOT</version>
</parent>
<artifactId>dhp-build-properties-maven-plugin</artifactId>

View File

@ -4,7 +4,7 @@
<parent>
<groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp</artifactId>
<version>1.2.5-SNAPSHOT</version>
<version>2.0.0-SNAPSHOT</version>
</parent>
<artifactId>dhp-build</artifactId>
<packaging>pom</packaging>

View File

@ -5,7 +5,7 @@
<parent>
<groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp</artifactId>
<version>1.2.5-SNAPSHOT</version>
<version>2.0.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>

View File

@ -1,6 +1,8 @@
package eu.dnetlib.dhp.common.action;
import static eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory.*;
import java.io.BufferedWriter;
import java.io.IOException;
import java.io.OutputStreamWriter;
@ -19,7 +21,6 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.common.DbClient;
import eu.dnetlib.dhp.common.action.model.MasterDuplicate;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
public class ReadDatasourceMasterDuplicateFromDB {
@ -59,8 +60,8 @@ public class ReadDatasourceMasterDuplicateFromDB {
final String masterId = rs.getString("masterId");
final String masterName = rs.getString("masterName");
md.setDuplicateId(OafMapperUtils.createOpenaireId(10, duplicateId, true));
md.setMasterId(OafMapperUtils.createOpenaireId(10, masterId, true));
md.setDuplicateId(createOpenaireId(10, duplicateId, true));
md.setMasterId(createOpenaireId(10, masterId, true));
md.setMasterName(masterName);
return md;

View File

@ -121,10 +121,12 @@ public class AuthorMerger {
}
public static String pidToComparableString(StructuredProperty pid) {
final String classid = pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase()
: "";
return (pid.getQualifier() != null ? classid : "")
+ (pid.getValue() != null ? pid.getValue().toLowerCase() : "");
return pid.toComparableString();
/*
* final String classid = pid.getQualifier().getClassid() != null ?
* pid.getQualifier().getClassid().toLowerCase() : ""; return (pid.getQualifier() != null ? classid : "") +
* (pid.getValue() != null ? pid.getValue().toLowerCase() : "");
*/
}
public static int countAuthorsPids(List<Author> authors) {

View File

@ -10,8 +10,6 @@ import java.util.Objects;
import java.util.Optional;
import java.util.stream.Collectors;
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.spark.SparkConf;
@ -33,6 +31,8 @@ import com.jayway.jsonpath.Option;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
import scala.Tuple2;
/**
@ -120,7 +120,7 @@ public class GroupEntitiesSparkJob {
private Entity mergeAndGet(Entity b, Entity a) {
if (Objects.nonNull(a) && Objects.nonNull(b)) {
return MergeUtils.mergeEntities(b, a);
return MergeUtils.merge(b, a);
}
return Objects.isNull(a) ? b : a;
}

View File

@ -0,0 +1,252 @@
package eu.dnetlib.dhp.schema.common;
import eu.dnetlib.dhp.schema.oaf.AccessRight;
import eu.dnetlib.dhp.schema.oaf.KeyValue;
import eu.dnetlib.dhp.schema.oaf.Qualifier;
public class ModelConstants {
private ModelConstants() {}
public static final String ORCID = "orcid";
public static final String ORCID_PENDING = "orcid_pending";
public static final String ORCID_CLASSNAME = "Open Researcher and Contributor ID";
public static final String ORCID_DS = ORCID.toUpperCase();
public static final String CROSSREF_ID = "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2";
public static final String CROSSREF_NAME = "Crossref";
public static final String DATACITE_ID = "10|openaire____::9e3be59865b2c1c335d32dae2fe7b254";
public static final String ZENODO_OD_ID = "10|opendoar____::358aee4cc897452c00244351e4d91f69";
public static final String ZENODO_R3_ID = "10|re3data_____::7b0ad08687b2c960d5aeef06f811d5e6";
public static final String EUROPE_PUBMED_CENTRAL_ID = "10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c";
public static final String PUBMED_CENTRAL_ID = "10|opendoar____::eda80a3d5b344bc40f3bc04f65b7a357";
public static final String ARXIV_ID = "10|opendoar____::6f4922f45568161a8cdf4ad2299f6d23";
public static final String ROHUB_ID = "10|fairsharing_::1b69ebedb522700034547abc5652ffac";
public static final String OPENORGS_NAME = "OpenOrgs Database";
public static final String OPENOCITATIONS_NAME = "OpenCitations";
public static final String OPENOCITATIONS_ID = "10|openaire____::c06df618c5de1c786535ccf3f8b7b059";
public static final String OPEN_APC_NAME = "OpenAPC Global Initiative";
public static final String OPEN_APC_ID = "10|apc_________::e2b1600b229fc30663c8a1f662debddf";
// VOCABULARY VALUE
public static final String ACCESS_RIGHT_OPEN = "OPEN";
public static final String ACCESS_RIGHT_EMBARGO = "EMBARGO";
public static final String ACCESS_RIGHT_CLOSED = "CLOSED";
public static final String DNET_SUBJECT_KEYWORD = "keyword";
public static final String DNET_SUBJECT_FOS_CLASSID = "FOS";
public static final String DNET_SUBJECT_FOS_CLASSNAME = "Fields of Science and Technology classification";
public static final String DNET_SUBJECT_TYPOLOGIES = "dnet:subject_classification_typologies";
public static final String DNET_RESULT_TYPOLOGIES = "dnet:result_typologies";
public static final String DNET_PUBLICATION_RESOURCE = "dnet:publication_resource";
public static final String DNET_ACCESS_MODES = "dnet:access_modes";
public static final String DNET_LANGUAGES = "dnet:languages";
public static final String DNET_PID_TYPES = "dnet:pid_types";
public static final String DNET_DATACITE_DATE = "dnet:dataCite_date";
public static final String DNET_DATACITE_TITLE = "dnet:dataCite_title";
public static final String DNET_DATA_CITE_RESOURCE = "dnet:dataCite_resource";
public static final String DNET_PROVENANCE_ACTIONS = "dnet:provenanceActions";
public static final String DNET_COUNTRY_TYPE = "dnet:countries";
public static final String DNET_REVIEW_LEVELS = "dnet:review_levels";
public static final String DNET_PROGRAMMING_LANGUAGES = "dnet:programming_languages";
public static final String DNET_EXTERNAL_REFERENCE_TYPE = "dnet:externalReference_typologies";
public static final String DNET_RELATION_RELTYPE = "dnet:relation_relType";
public static final String DNET_RELATION_SUBRELTYPE = "dnet:relation_subRelType";
public static final String DNET_RELATION_RELCLASS = "dnet:relation_relClass";
public static final String PEER_REVIEWED_CLASSNAME = "nonPeerReviewed";
public static final String NON_PEER_REVIEWED_CLASSNAME = "nonPeerReviewed";
public static final String PEER_REVIEWED_CLASSID = "0001";
public static final String NON_PEER_REVIEWED_CLASSID = "0002";
public static final String SYSIMPORT_CROSSWALK_REPOSITORY = "sysimport:crosswalk:repository";
public static final String SYSIMPORT_CROSSWALK_ENTITYREGISTRY = "sysimport:crosswalk:entityregistry";
public static final String SYSIMPORT_ACTIONSET = "sysimport:actionset";
public static final String SYSIMPORT_ORCID_NO_DOI = "sysimport:actionset:orcidworks-no-doi";
public static final String USER_CLAIM = "user:claim";
public static final String HARVESTED = "Harvested";
public static final String PROVENANCE_DEDUP = "sysimport:dedup";
public static final String PROVENANCE_ENRICH = "sysimport:enrich";
public static final Qualifier PROVENANCE_ACTION_SET_QUALIFIER = qualifier(
SYSIMPORT_ACTIONSET, SYSIMPORT_ACTIONSET, DNET_PROVENANCE_ACTIONS);
public static final String DATASET_RESULTTYPE_CLASSID = "dataset";
public static final String PUBLICATION_RESULTTYPE_CLASSID = "publication";
public static final String SOFTWARE_RESULTTYPE_CLASSID = "software";
public static final String ORP_RESULTTYPE_CLASSID = "other";
public static final String RESULT_RESULT = "resultResult"; // relType
/**
* @deprecated Use {@link ModelConstants#RELATIONSHIP} instead.
*/
@Deprecated
public static final String PUBLICATION_DATASET = "publicationDataset"; // subreltype
public static final String SUPPLEMENT = "supplement"; // subreltype
public static final String IS_SUPPLEMENT_TO = "IsSupplementTo";
public static final String IS_SUPPLEMENTED_BY = "IsSupplementedBy";
public static final String PART = "part"; // subreltype
public static final String IS_PART_OF = "IsPartOf";
public static final String HAS_PART = "HasPart";
public static final String RELATIONSHIP = "relationship"; // subreltype
public static final String IS_RELATED_TO = "IsRelatedTo";
public static final String IS_IDENTICAL_TO = "IsIdenticalTo";
public static final String REFERENCES = "References";
public static final String IS_REFERENCED_BY = "IsReferencedBy";
public static final String CONTINUES = "Continues";
public static final String IS_CONTINUED_BY = "IsContinuedBy";
public static final String DOCUMENTS = "Documents";
public static final String IS_DOCUMENTED_BY = "IsDocumentedBy";
public static final String IS_SOURCE_OF = "IsSourceOf";
public static final String IS_DERIVED_FROM = "IsDerivedFrom";
public static final String COMPILES = "Compiles";
public static final String IS_COMPILED_BY = "IsCompiledBy";
public static final String DESCRIBES = "Describes";
public static final String IS_DESCRIBED_BY = "IsDescribedBy";
public static final String IS_METADATA_FOR = "IsMetadataFor";
public static final String IS_METADATA_OF = "IsMetadataOf";
public static final String HAS_ASSOCIATION_WITH = "HasAssociationWith";
public static final String IS_REQUIRED_BY = "IsRequiredBy";
public static final String REQUIRES = "Requires";
public static final String CITATION = "citation"; // subreltype
public static final String CITES = "Cites";
public static final String IS_CITED_BY = "IsCitedBy";
public static final String REVIEW = "review"; // subreltype
public static final String REVIEWS = "Reviews";
public static final String IS_REVIEWED_BY = "IsReviewedBy";
public static final String VERSION = "version"; // subreltype
public static final String IS_VERSION_OF = "IsVersionOf";
public static final String HAS_VERSION = "HasVersion";
public static final String IS_PREVIOUS_VERSION_OF = "IsPreviousVersionOf";
public static final String IS_NEW_VERSION_OF = "IsNewVersionOf";
public static final String IS_VARIANT_FORM_OF = "IsVariantFormOf";
public static final String IS_ORIGINAL_FORM_OF = "IsOriginalFormOf";
public static final String IS_OBSOLETED_BY = "IsObsoletedBy";
public static final String OBSOLETES = "Obsoletes";
public static final String RESULT_PROJECT = "resultProject"; // relType
public static final String OUTCOME = "outcome"; // subreltype
public static final String IS_PRODUCED_BY = "isProducedBy";
public static final String PRODUCES = "produces";
public static final String DATASOURCE_ORGANIZATION = "datasourceOrganization"; // relType
public static final String PROVISION = "provision"; // subreltype
public static final String IS_PROVIDED_BY = "isProvidedBy";
public static final String PROVIDES = "provides";
public static final String PROJECT_ORGANIZATION = "projectOrganization"; // relType
public static final String PARTICIPATION = "participation"; // subreltype
public static final String HAS_PARTICIPANT = "hasParticipant";
public static final String IS_PARTICIPANT = "isParticipant";
public static final String RESULT_ORGANIZATION = "resultOrganization"; // relType
public static final String AFFILIATION = "affiliation"; // subreltype
public static final String IS_AUTHOR_INSTITUTION_OF = "isAuthorInstitutionOf";
public static final String HAS_AUTHOR_INSTITUTION = "hasAuthorInstitution";
public static final String ORG_ORG_RELTYPE = "organizationOrganization"; // relType
public static final String IS_PARENT_OF = "IsParentOf";
public static final String IS_CHILD_OF = "IsChildOf";
public static final String DEDUP = "dedup"; // subreltype
public static final String MERGES = "merges";
public static final String IS_MERGED_IN = "isMergedIn";
public static final String SIMILARITY = "similarity"; // subreltype
public static final String IS_SIMILAR_TO = "isSimilarTo";
public static final String IS_AMONG_TOP_N_SIMILAR_DOCS = "IsAmongTopNSimilarDocuments";
public static final String HAS_AMONG_TOP_N_SIMILAR_DOCS = "HasAmongTopNSimilarDocuments";
public static final String IS_DIFFERENT_FROM = "isDifferentFrom";
public static final String UNKNOWN = "UNKNOWN";
public static final String NOT_AVAILABLE = "not available";
public static final Qualifier PUBLICATION_DEFAULT_RESULTTYPE = qualifier(
PUBLICATION_RESULTTYPE_CLASSID, PUBLICATION_RESULTTYPE_CLASSID,
DNET_RESULT_TYPOLOGIES);
public static final Qualifier DATASET_DEFAULT_RESULTTYPE = qualifier(
DATASET_RESULTTYPE_CLASSID, DATASET_RESULTTYPE_CLASSID,
DNET_RESULT_TYPOLOGIES);
public static final Qualifier SOFTWARE_DEFAULT_RESULTTYPE = qualifier(
SOFTWARE_RESULTTYPE_CLASSID, SOFTWARE_RESULTTYPE_CLASSID,
DNET_RESULT_TYPOLOGIES);
public static final Qualifier ORP_DEFAULT_RESULTTYPE = qualifier(
ORP_RESULTTYPE_CLASSID, ORP_RESULTTYPE_CLASSID,
DNET_RESULT_TYPOLOGIES);
public static final Qualifier REPOSITORY_PROVENANCE_ACTIONS = qualifier(
SYSIMPORT_CROSSWALK_REPOSITORY, SYSIMPORT_CROSSWALK_REPOSITORY,
DNET_PROVENANCE_ACTIONS);
public static final Qualifier ENTITYREGISTRY_PROVENANCE_ACTION = qualifier(
SYSIMPORT_CROSSWALK_ENTITYREGISTRY, SYSIMPORT_CROSSWALK_ENTITYREGISTRY,
DNET_PROVENANCE_ACTIONS);
public static final String UNKNOWN_REPOSITORY_ORIGINALID = "openaire____::1256f046-bf1f-4afc-8b47-d0b147148b18";
public static final KeyValue UNKNOWN_REPOSITORY = keyValue(
"10|openaire____::55045bd2a65019fd8e6741a755395c8c", "Unknown Repository");
public static final Qualifier UNKNOWN_COUNTRY = qualifier(UNKNOWN, "Unknown", DNET_COUNTRY_TYPE);
public static final Qualifier MAIN_TITLE_QUALIFIER = qualifier(
"main title", "main title", DNET_DATACITE_TITLE);
public static final Qualifier ALTERNATIVE_TITLE_QUALIFIER = qualifier(
"alternative title", "alternative title", DNET_DATACITE_TITLE);
private static final Qualifier SUBTITLE_QUALIFIER = qualifier("subtitle", "subtitle", DNET_DATACITE_TITLE);
public static final AccessRight OPEN_ACCESS_RIGHT() {
final AccessRight result = new AccessRight();
result.setClassid(ACCESS_RIGHT_OPEN);
result.setClassid(ACCESS_RIGHT_OPEN);
result.setSchemeid(ModelConstants.DNET_ACCESS_MODES);
return result;
}
private static Qualifier qualifier(
final String classid,
final String classname,
final String schemeid) {
final Qualifier q = new Qualifier();
q.setClassid(classid);
q.setClassname(classname);
q.setSchemeid(schemeid);
return q;
}
private static KeyValue keyValue(final String key, final String value) {
final KeyValue kv = new KeyValue();
kv.setKey(key);
kv.setValue(value);
return kv;
}
}

View File

@ -1,10 +1,10 @@
package eu.dnetlib.dhp.schema.oaf.common;
import eu.dnetlib.dhp.schema.oaf.Qualifier;
import java.util.Comparator;
import eu.dnetlib.dhp.schema.oaf.Qualifier;
public class AccessRightComparator<T extends Qualifier> implements Comparator<T> {
@Override

View File

@ -1,12 +1,8 @@
package eu.dnetlib.dhp.schema.oaf.common;
import com.github.sisyphsu.dateparser.DateParserUtils;
import com.google.common.collect.Maps;
import eu.dnetlib.dhp.schema.oaf.*;
import org.apache.commons.codec.binary.Hex;
import org.apache.commons.lang3.StringUtils;
import static com.google.common.base.Preconditions.checkArgument;
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
import java.nio.charset.StandardCharsets;
import java.security.MessageDigest;
@ -18,8 +14,13 @@ import java.util.Objects;
import java.util.Optional;
import java.util.function.Function;
import static com.google.common.base.Preconditions.checkArgument;
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
import org.apache.commons.codec.binary.Hex;
import org.apache.commons.lang3.StringUtils;
import com.github.sisyphsu.dateparser.DateParserUtils;
import com.google.common.collect.Maps;
import eu.dnetlib.dhp.schema.oaf.*;
/** Oaf model utility methods. */
public class ModelSupport {
@ -129,7 +130,6 @@ public class ModelSupport {
set(relationInverseMap, RESULT_RESULT, RELATIONSHIP, HAS_ASSOCIATION_WITH, HAS_ASSOCIATION_WITH);
set(relationInverseMap, RESULT_RESULT, RELATIONSHIP, IS_REQUIRED_BY, REQUIRES);
set(relationInverseMap, RESULT_RESULT, VERSION, IS_PREVIOUS_VERSION_OF, IS_NEW_VERSION_OF);
set(relationInverseMap, RESULT_RESULT, VERSION, IS_VARIANT_FORM_OF, IS_ORIGINAL_FORM_OF);
set(relationInverseMap, RESULT_RESULT, VERSION, IS_OBSOLETED_BY, OBSOLETES);
@ -138,22 +138,23 @@ public class ModelSupport {
set(relationInverseMap, RESULT_RESULT, REVIEW, IS_REVIEWED_BY, REVIEWS);
}
private static void set(Map<String, RelationInverse> relationInverseMap, String relType, String subRelType, String relClass, String inverseRelClass) {
private static void set(Map<String, RelationInverse> relationInverseMap, String relType, String subRelType,
String relClass, String inverseRelClass) {
relationInverseMap
.put(
rel(relType, subRelType, relClass), new RelationInverse()
.setInverseRelClass(inverseRelClass)
.setRelClass(relClass)
.setRelType(relType)
.setSubReltype(subRelType));
.put(
rel(relType, subRelType, relClass), new RelationInverse()
.setInverseRelClass(inverseRelClass)
.setRelClass(relClass)
.setRelType(relType)
.setSubReltype(subRelType));
if (!relClass.equals(inverseRelClass)) {
relationInverseMap
.put(
rel(relType, subRelType, inverseRelClass), new RelationInverse()
.setInverseRelClass(relClass)
.setRelClass(inverseRelClass)
.setRelType(relType)
.setSubReltype(subRelType));
.put(
rel(relType, subRelType, inverseRelClass), new RelationInverse()
.setInverseRelClass(relClass)
.setRelClass(inverseRelClass)
.setRelType(relType)
.setSubReltype(subRelType));
}
}
@ -164,25 +165,26 @@ public class ModelSupport {
*/
public static RelationInverse findInverse(String encoding) {
return ModelSupport.relationInverseMap
.entrySet()
.stream()
.filter(r -> encoding.equalsIgnoreCase(r.getKey()))
.findFirst()
.map(r -> r.getValue())
.orElseThrow(() -> new IllegalArgumentException("invalid relationship: " + encoding));
.entrySet()
.stream()
.filter(r -> encoding.equalsIgnoreCase(r.getKey()))
.findFirst()
.map(r -> r.getValue())
.orElseThrow(() -> new IllegalArgumentException("invalid relationship: " + encoding));
}
/**
* Helper method: fina a relation filtering by a relation name
* @param relationName
* @param relationName
* @return
*/
public static RelationInverse findRelation(final String relationName) {
return relationInverseMap.values()
.stream()
.filter(r -> relationName.equalsIgnoreCase(r.getRelClass()))
.findFirst()
.orElse(null);
return relationInverseMap
.values()
.stream()
.filter(r -> relationName.equalsIgnoreCase(r.getRelClass()))
.findFirst()
.orElse(null);
}
/**
@ -207,6 +209,10 @@ public class ModelSupport {
return idPrefixMap.get(clazz);
}
public static <X extends Oaf, Y extends Oaf, Z extends Oaf> Boolean sameClass(X left, Y right, Class<Z> superClazz) {
return isSubClass(left, superClazz) && isSubClass(right, superClazz);
}
/**
* Checks subclass-superclass relationship.
*

View File

@ -1,11 +1,11 @@
package eu.dnetlib.dhp.schema.oaf.common;
import java.util.Comparator;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.Qualifier;
import java.util.Comparator;
public class RefereedComparator implements Comparator<Qualifier> {
@Override

View File

@ -1,16 +1,7 @@
package eu.dnetlib.dhp.schema.oaf.utils;
import com.github.sisyphsu.dateparser.DateParserUtils;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
import me.xuender.unidecode.Unidecode;
import org.apache.commons.lang3.StringUtils;
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.getProvenance;
import java.time.LocalDate;
import java.time.ZoneId;
@ -21,7 +12,17 @@ import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.getProvenance;
import org.apache.commons.lang3.StringUtils;
import com.github.sisyphsu.dateparser.DateParserUtils;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
import me.xuender.unidecode.Unidecode;
public class GraphCleaningFunctions extends CleaningFunctions {

View File

@ -12,7 +12,6 @@ import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
import org.apache.commons.codec.binary.Hex;
import org.apache.commons.lang3.StringUtils;
@ -20,6 +19,7 @@ import com.google.common.collect.HashBiMap;
import com.google.common.collect.Maps;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
/**
* Factory class for OpenAIRE identifiers in the Graph
@ -268,7 +268,7 @@ public class IdentifierFactory implements Serializable {
.append(ID_PREFIX_SEPARATOR)
.append(createPrefix(pidType))
.append(ID_SEPARATOR)
.append(md5 ? md5(pidValue) : pidValue)
.append(md5 ? ModelSupport.md5(pidValue) : pidValue)
.toString();
}
@ -281,13 +281,36 @@ public class IdentifierFactory implements Serializable {
return prefix.substring(0, ID_PREFIX_LEN);
}
public static String md5(final String s) {
try {
final MessageDigest md = MessageDigest.getInstance("MD5");
md.update(s.getBytes(StandardCharsets.UTF_8));
return new String(Hex.encodeHex(md.digest()));
} catch (final Exception e) {
public static String createOpenaireId(
final int prefix,
final String originalId,
final boolean to_md5) {
if (StringUtils.isBlank(originalId)) {
return null;
} else if (to_md5) {
final String nsPrefix = StringUtils.substringBefore(originalId, "::");
final String rest = StringUtils.substringAfter(originalId, "::");
return String.format("%s|%s::%s", prefix, nsPrefix, ModelSupport.md5(rest));
} else {
return String.format("%s|%s", prefix, originalId);
}
}
public static String createOpenaireId(
final String type,
final String originalId,
final boolean to_md5) {
switch (type) {
case "datasource":
return createOpenaireId(10, originalId, to_md5);
case "organization":
return createOpenaireId(20, originalId, to_md5);
case "person":
return createOpenaireId(30, originalId, to_md5);
case "project":
return createOpenaireId(40, originalId, to_md5);
default:
return createOpenaireId(50, originalId, to_md5);
}
}

View File

@ -1,156 +0,0 @@
package eu.dnetlib.dhp.schema.oaf.utils;
import java.lang.reflect.Field;
import java.util.Collection;
import java.util.Iterator;
public class MergeUtils2 {
/**
* Recursively merges the fields of the provider into the receiver.
*
* @param receiver the receiver instance.
* @param provider the provider instance.
*/
public static <T> void merge(final T receiver, final T provider) {
Field[] fields = receiver.getClass().getDeclaredFields();
for (Field field : fields) {
try {
field.setAccessible(true);
Object receiverObject = field.get(receiver);
Object providerObject = field.get(provider);
if (receiverObject == null || providerObject == null) {
/* One is null */
field.set(receiver, providerObject);
} else if (field.getType().isAssignableFrom(Collection.class)) {
/* Collection field */
// noinspection rawtypes
mergeCollections((Collection) receiverObject, (Collection) providerObject);
} else if (field.getType().isPrimitive() || field.getType().isEnum()
|| field.getType().equals(String.class)) {
/* Primitive, Enum or String field */
field.set(receiver, providerObject);
} else {
/* Mergeable field */
merge(receiverObject, providerObject);
}
} catch (IllegalAccessException e) {
/* Should not happen */
throw new RuntimeException(e);
}
}
}
/**
* Recursively merges the items in the providers collection into the receivers collection.
* Receivers not present in providers will be removed, providers not present in receivers will be added.
* If the item has a field called 'id', this field will be compared to match the items.
*
* @param receivers the collection containing the receiver instances.
* @param providers the collection containing the provider instances.
*/
public static <T> void mergeCollections(final Collection<T> receivers, final Collection<T> providers) {
if (receivers.isEmpty() && providers.isEmpty()) {
return;
}
if (providers.isEmpty()) {
receivers.clear();
return;
}
if (receivers.isEmpty()) {
receivers.addAll(providers);
return;
}
Field idField;
try {
T t = providers.iterator().next();
idField = t.getClass().getDeclaredField("id");
idField.setAccessible(true);
} catch (NoSuchFieldException ignored) {
idField = null;
}
try {
if (idField != null) {
mergeCollectionsWithId(receivers, providers, idField);
} else {
mergeCollectionsSimple(receivers, providers);
}
} catch (IllegalAccessException e) {
/* Should not happen */
throw new RuntimeException(e);
}
}
/**
* Recursively merges the items in the collections for which the id's are equal.
*
* @param receivers the collection containing the receiver items.
* @param providers the collection containing the provider items.
* @param idField the id field.
*
* @throws IllegalAccessException if the id field is not accessible.
*/
private static <T> void mergeCollectionsWithId(final Collection<T> receivers, final Iterable<T> providers,
final Field idField) throws IllegalAccessException {
/* Find a receiver for each provider */
for (T provider : providers) {
boolean found = false;
for (T receiver : receivers) {
if (idField.get(receiver).equals(idField.get(provider))) {
merge(receiver, provider);
found = true;
}
}
if (!found) {
receivers.add(provider);
}
}
/* Remove receivers not in providers */
for (Iterator<T> iterator = receivers.iterator(); iterator.hasNext();) {
T receiver = iterator.next();
boolean found = false;
for (T provider : providers) {
if (idField.get(receiver).equals(idField.get(provider))) {
found = true;
}
}
if (!found) {
iterator.remove();
}
}
}
/**
* Recursively merges the items in the collections one by one. Disregards equality.
*
* @param receivers the collection containing the receiver items.
* @param providers the collection containing the provider items.
*/
private static <T> void mergeCollectionsSimple(final Collection<T> receivers, final Iterable<T> providers) {
Iterator<T> receiversIterator = receivers.iterator();
Iterator<T> providersIterator = providers.iterator();
while (receiversIterator.hasNext() && providersIterator.hasNext()) {
merge(receiversIterator.next(), providersIterator.next());
}
/* Remove excessive receivers if present */
while (receiversIterator.hasNext()) {
receiversIterator.next();
receiversIterator.remove();
}
/* Add residual providers to receivers if present */
while (providersIterator.hasNext()) {
receivers.add(providersIterator.next());
}
}
}

View File

@ -1,89 +0,0 @@
package eu.dnetlib.dhp.schema.oaf.utils;
import java.lang.reflect.Field;
import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import static org.apache.commons.lang3.ClassUtils.isPrimitiveOrWrapper;
public class MergeUtils3 {
private final List<Object> selfObjects;
private final Object source;
private final Object target;
private MergeUtils3(Object source, Object target) {
this.source = source;
this.target = target;
this.selfObjects = new ArrayList<>();
}
public static MergeUtils3 mergerOf(Object source, Object target) {
return new MergeUtils3(source, target);
}
public final void merge() {
try {
merge(source, target);
} catch (IllegalAccessException | NoSuchFieldException e) {
throw new RuntimeException("Merge error: ", e);
}
}
private void merge(Object source, Object target) throws IllegalAccessException, NoSuchFieldException {
selfObjects.add(source);
Field[] declaredFields = source.getClass().getDeclaredFields();
for (Field declaredField : declaredFields) {
declaredField.setAccessible(true);
Object fieldValue = declaredField.get(source);
if (fieldValue == null || selfObjects.contains(fieldValue)) {
continue;
}
Class<?> declaredFieldType = declaredField.getType();
if (isJdkType(declaredField)) {
Field targetField = target.getClass().getDeclaredField(declaredField.getName());
targetField.setAccessible(true);
targetField.set(target, fieldValue);
continue;
}
if (Collection.class.isAssignableFrom(declaredFieldType)) {
Iterable sourceCollection = (Iterable) declaredField.get(source);
Iterable targetCollection = (Iterable) declaredField.get(target);
merge(sourceCollection, targetCollection);
continue;
}
merge(declaredField.get(source), declaredField.get(target));
}
}
private boolean isJdkType(Field field) {
Class<?> declaredFieldType = field.getType();
String fieldTypeName = declaredFieldType.getName();
return isPrimitiveOrWrapper(declaredFieldType)
|| fieldTypeName.equals(String.class.getName())
|| fieldTypeName.equals(BigDecimal.class.getName());
}
private void merge(Iterable source, Iterable target) throws NoSuchFieldException, IllegalAccessException {
Iterator sourceIterator = source.iterator();
Iterator targetIterator = target.iterator();
while (sourceIterator.hasNext()) {
merge(sourceIterator.next(), targetIterator.next());
}
}
}

View File

@ -11,10 +11,10 @@ import java.util.function.Function;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import eu.dnetlib.dhp.schema.oaf.common.AccessRightComparator;
import org.apache.commons.lang3.StringUtils;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.common.AccessRightComparator;
public class OafMapperUtils {
@ -208,8 +208,7 @@ public class OafMapperUtils {
final String name,
final String issnPrinted,
final String issnOnline,
final String issnLinking,
final DataInfo dataInfo) {
final String issnLinking) {
return hasIssn(issnPrinted, issnOnline, issnLinking) ? journal(
name,
@ -222,8 +221,7 @@ public class OafMapperUtils {
null,
null,
null,
null,
dataInfo) : null;
null) : null;
}
public static Journal journal(
@ -237,8 +235,7 @@ public class OafMapperUtils {
final String vol,
final String edition,
final String conferenceplace,
final String conferencedate,
final DataInfo dataInfo) {
final String conferencedate) {
if (StringUtils.isNotBlank(name) || hasIssn(issnPrinted, issnOnline, issnLinking)) {
final Journal j = new Journal();
@ -253,7 +250,6 @@ public class OafMapperUtils {
j.setEdition(edition);
j.setConferenceplace(conferenceplace);
j.setConferencedate(conferencedate);
j.setDataInfo(dataInfo);
return j;
} else {
return null;
@ -296,39 +292,6 @@ public class OafMapperUtils {
return d;
}
public static String createOpenaireId(
final int prefix,
final String originalId,
final boolean to_md5) {
if (StringUtils.isBlank(originalId)) {
return null;
} else if (to_md5) {
final String nsPrefix = StringUtils.substringBefore(originalId, "::");
final String rest = StringUtils.substringAfter(originalId, "::");
return String.format("%s|%s::%s", prefix, nsPrefix, IdentifierFactory.md5(rest));
} else {
return String.format("%s|%s", prefix, originalId);
}
}
public static String createOpenaireId(
final String type,
final String originalId,
final boolean to_md5) {
switch (type) {
case "datasource":
return createOpenaireId(10, originalId, to_md5);
case "organization":
return createOpenaireId(20, originalId, to_md5);
case "person":
return createOpenaireId(30, originalId, to_md5);
case "project":
return createOpenaireId(40, originalId, to_md5);
default:
return createOpenaireId(50, originalId, to_md5);
}
}
public static String asString(final Object o) {
return o == null ? "" : o.toString();
}
@ -416,14 +379,14 @@ public class OafMapperUtils {
}
public static Relation getRelation(final String source,
final String target,
final String relType,
final String subRelType,
final String relClass,
final List<Provenance> provenance,
final List<KeyValue> properties) {
final String target,
final String relType,
final String subRelType,
final String relClass,
final List<Provenance> provenance,
final List<KeyValue> properties) {
return getRelation(
source, target, relType, subRelType, relClass, provenance, null, properties);
source, target, relType, subRelType, relClass, provenance, null, properties);
}
public static Relation getRelation(final String source,

View File

@ -0,0 +1,59 @@
package eu.dnetlib.dhp.schema.sx
import eu.dnetlib.dhp.schema.common.ModelConstants
import eu.dnetlib.dhp.schema.oaf._
object OafUtils {
def generateKeyValue(key: String, value: String): KeyValue = {
val kv: KeyValue = new KeyValue()
kv.setKey(key)
kv.setValue(value)
kv
}
def generateDataInfo(trust: Float = 0.9f, invisible: Boolean = false): DataInfo = {
val di = new DataInfo
di.setInferred(false)
di.setTrust(trust)
di.setProvenanceaction(createQualifier(ModelConstants.SYSIMPORT_ACTIONSET, ModelConstants.DNET_PROVENANCE_ACTIONS))
di
}
def createQualifier(cls: String, sch: String): Qualifier = {
createQualifier(cls, cls, sch)
}
def createQualifier(classId: String, className: String, schemeId: String): Qualifier = {
val q: Qualifier = new Qualifier
q.setClassid(classId)
q.setClassname(className)
q.setSchemeid(schemeId)
q
}
def createAccessRight(classId: String, className: String, schemeId: String): AccessRight = {
val accessRight: AccessRight = new AccessRight
accessRight.setClassid(classId)
accessRight.setClassname(className)
accessRight.setSchemeid(schemeId)
accessRight
}
def createSP(value: String, classId: String,className:String, schemeId: String): StructuredProperty = {
val sp = new StructuredProperty
sp.setQualifier(createQualifier(classId,className, schemeId))
sp.setValue(value)
sp
}
def createSP(value: String, classId: String, schemeId: String): StructuredProperty = {
val sp = new StructuredProperty
sp.setQualifier(createQualifier(classId, schemeId))
sp.setValue(value)
sp
}
}

View File

@ -1,15 +1,16 @@
package eu.dnetlib.dhp.schema.oaf.common;
import static org.junit.jupiter.api.Assertions.*;
import java.io.IOException;
import org.junit.jupiter.api.Nested;
import org.junit.jupiter.api.Test;
import eu.dnetlib.dhp.schema.oaf.Entity;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.schema.oaf.Result;
import org.junit.jupiter.api.Nested;
import org.junit.jupiter.api.Test;
import java.io.IOException;
import static org.junit.jupiter.api.Assertions.*;
public class ModelSupportTest {
@ -35,18 +36,15 @@ public class ModelSupportTest {
}
}
@Nested
class InverseRelation {
@Test
void findRelations() throws IOException {
void findRelations() {
assertNotNull(ModelSupport.findRelation("isMetadataFor"));
assertNotNull(ModelSupport.findRelation("ismetadatafor"));
assertNotNull(ModelSupport.findRelation("ISMETADATAFOR"));
assertNotNull(ModelSupport.findRelation("isRelatedTo"));
}
}
}

View File

@ -78,10 +78,7 @@ class IdentifierFactoryTest {
final String json = IOUtils.toString(getClass().getResourceAsStream(filename));
final Publication pub = OBJECT_MAPPER.readValue(json, Publication.class);
String id = IdentifierFactory.createIdentifier(pub, md5);
System.out.println(id);
assertNotNull(id);
assertEquals(expectedID, id);
assertEquals(expectedID, IdentifierFactory.createIdentifier(pub, md5));
}
}

View File

@ -1,97 +1,110 @@
package eu.dnetlib.dhp.schema.oaf.utils;
import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.Dataset;
import eu.dnetlib.dhp.schema.oaf.KeyValue;
import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.dhp.schema.oaf.Result;
import org.apache.commons.io.IOUtils;
import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.*;
import static org.junit.jupiter.api.Assertions.assertEquals;
import java.io.IOException;
import java.util.HashSet;
import java.util.List;
import java.util.stream.Collectors;
import static org.junit.jupiter.api.Assertions.*;
import static org.junit.jupiter.api.Assertions.assertEquals;
import org.apache.commons.io.IOUtils;
import org.junit.jupiter.api.Test;
import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.Dataset;
import eu.dnetlib.dhp.schema.oaf.KeyValue;
import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.dhp.schema.oaf.Result;
public class MergeUtilsTest {
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper()
.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper()
.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
@Test
void testMergePubs() throws IOException {
Publication p1 = read("publication_1.json", Publication.class);
Publication p2 = read("publication_2.json", Publication.class);
Dataset d1 = read("dataset_1.json", Dataset.class);
Dataset d2 = read("dataset_2.json", Dataset.class);
@Test
void testMergePubs() throws IOException {
Publication p1 = read("publication_1.json", Publication.class);
Publication p2 = read("publication_2.json", Publication.class);
Dataset d1 = read("dataset_1.json", Dataset.class);
Dataset d2 = read("dataset_2.json", Dataset.class);
assertEquals(1, p1.getCollectedfrom().size());
assertEquals(ModelConstants.CROSSREF_ID, p1.getCollectedfrom().get(0).getKey());
assertEquals(1, d2.getCollectedfrom().size());
assertFalse(cfId(d2.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));
assertEquals(1, p1.getCollectedfrom().size());
assertEquals(ModelConstants.CROSSREF_ID, p1.getCollectedfrom().get(0).getKey());
assertEquals(1, d2.getCollectedfrom().size());
assertFalse(cfId(d2.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));
assertEquals(1, p2.getCollectedfrom().size());
assertFalse(cfId(p2.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));
assertEquals(1, d1.getCollectedfrom().size());
assertTrue(cfId(d1.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));
assertEquals(1, p2.getCollectedfrom().size());
assertFalse(cfId(p2.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));
assertEquals(1, d1.getCollectedfrom().size());
assertTrue(cfId(d1.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));
final Result p1d2 = MergeUtils.mergeResults(p1, d2);
assertEquals(ModelConstants.PUBLICATION_RESULTTYPE_CLASSID, p1d2.getResulttype());
assertTrue(p1d2 instanceof Publication);
assertEquals(p1.getId(), p1d2.getId());
}
final Result p1d2 = MergeUtils.merge(p1, d2);
assertEquals(ModelConstants.PUBLICATION_RESULTTYPE_CLASSID, p1d2.getResulttype());
assertTrue(p1d2 instanceof Publication);
assertEquals(p1.getId(), p1d2.getId());
}
@Test
void testMergePubs_1() throws IOException {
Publication p2 = read("publication_2.json", Publication.class);
Dataset d1 = read("dataset_1.json", Dataset.class);
@Test
void testMergePubs_1() throws IOException {
Publication p2 = read("publication_2.json", Publication.class);
Dataset d1 = read("dataset_1.json", Dataset.class);
final Result p2d1 = MergeUtils.mergeResults(p2, d1);
assertEquals(ModelConstants.DATASET_RESULTTYPE_CLASSID, p2d1.getResulttype());
assertTrue(p2d1 instanceof Dataset);
assertEquals(d1.getId(), p2d1.getId());
assertEquals(2, p2d1.getCollectedfrom().size());
}
final Result p2d1 = MergeUtils.merge(p2, d1);
assertEquals(ModelConstants.DATASET_RESULTTYPE_CLASSID, p2d1.getResulttype());
assertTrue(p2d1 instanceof Dataset);
assertEquals(d1.getId(), p2d1.getId());
assertEquals(2, p2d1.getCollectedfrom().size());
}
@Test
void testMergePubs_2() throws IOException {
Publication p1 = read("publication_1.json", Publication.class);
Publication p2 = read("publication_2.json", Publication.class);
@Test
void testMergePubs_2() throws IOException {
Publication p1 = read("publication_1.json", Publication.class);
Publication p2 = read("publication_2.json", Publication.class);
Result p1p2 = MergeUtils.mergeResults(p1, p2);
assertTrue(p1p2 instanceof Publication);
assertEquals(p1.getId(), p1p2.getId());
assertEquals(2, p1p2.getCollectedfrom().size());
}
Result p1p2 = MergeUtils.merge(p1, p2);
assertTrue(p1p2 instanceof Publication);
assertEquals(p1.getId(), p1p2.getId());
assertEquals(2, p1p2.getCollectedfrom().size());
}
@Test
void testDelegatedAuthority() throws IOException {
Dataset d1 = read("dataset_2.json", Dataset.class);
Dataset d2 = read("dataset_delegated.json", Dataset.class);
@Test
void testDelegatedAuthority_1() throws IOException {
Dataset d1 = read("dataset_2.json", Dataset.class);
Dataset d2 = read("dataset_delegated.json", Dataset.class);
assertEquals(1, d2.getCollectedfrom().size());
assertTrue(cfId(d2.getCollectedfrom()).contains(ModelConstants.ZENODO_OD_ID));
assertEquals(1, d2.getCollectedfrom().size());
assertTrue(cfId(d2.getCollectedfrom()).contains(ModelConstants.ZENODO_OD_ID));
Result res = MergeUtils.mergeResults(d1, d2);
Result res = MergeUtils.merge(d1, d2);
assertEquals(d2, res);
assertEquals(d2, res);
}
System.out.println(OBJECT_MAPPER.writeValueAsString(res));
@Test
void testDelegatedAuthority_2() throws IOException {
Dataset p1 = read("publication_1.json", Dataset.class);
Dataset d2 = read("dataset_delegated.json", Dataset.class);
}
assertEquals(1, d2.getCollectedfrom().size());
assertTrue(cfId(d2.getCollectedfrom()).contains(ModelConstants.ZENODO_OD_ID));
protected HashSet<String> cfId(List<KeyValue> collectedfrom) {
return collectedfrom.stream().map(KeyValue::getKey).collect(Collectors.toCollection(HashSet::new));
}
Result res = MergeUtils.merge(p1, d2);
protected <T extends Result> T read(String filename, Class<T> clazz) throws IOException {
final String json = IOUtils.toString(getClass().getResourceAsStream(filename));
return OBJECT_MAPPER.readValue(json, clazz);
}
assertEquals(d2, res);
}
protected HashSet<String> cfId(List<KeyValue> collectedfrom) {
return collectedfrom.stream().map(KeyValue::getKey).collect(Collectors.toCollection(HashSet::new));
}
protected <T extends Result> T read(String filename, Class<T> clazz) throws IOException {
final String json = IOUtils.toString(getClass().getResourceAsStream(filename));
return OBJECT_MAPPER.readValue(json, clazz);
}
}

View File

@ -142,14 +142,13 @@ class OafMapperUtilsTest {
assertEquals("2013-11-12", GraphCleaningFunctions.cleanDate("1384216367189"));
assertEquals("2013-11-12", GraphCleaningFunctions.cleanDate("1384216367111222"));
assertEquals("2013-11-12", GraphCleaningFunctions.cleanDate("1384216367111222333"));
}
@Test
void testDate() {
final String date = GraphCleaningFunctions.cleanDate("23-FEB-1998");
assertNotNull(date);
System.out.println(date);
assertEquals("1998-02-23", date);
}
protected <T extends Result> T read(String filename, Class<T> clazz) throws IOException {

View File

@ -1,6 +1,8 @@
package eu.dnetlib.scholexplorer.relation;
import static org.junit.jupiter.api.Assertions.assertFalse;
import org.junit.jupiter.api.Test;
class RelationMapperTest {
@ -9,6 +11,6 @@ class RelationMapperTest {
void testLoadRels() throws Exception {
RelationMapper relationMapper = RelationMapper.load();
relationMapper.keySet().forEach(System.out::println);
assertFalse(relationMapper.isEmpty());
}
}

View File

@ -4,7 +4,7 @@
<parent>
<groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp-workflows</artifactId>
<version>1.2.5-SNAPSHOT</version>
<version>2.0.0-SNAPSHOT</version>
</parent>
<artifactId>dhp-actionmanager</artifactId>

View File

@ -46,30 +46,7 @@ public class MergeAndGet {
}
private static <G extends Oaf, A extends Oaf> G mergeFromAndGet(G x, A y) {
if (isSubClass(x, Relation.class) && isSubClass(y, Relation.class)) {
return (G) MergeUtils.mergeRelation((Relation) x, (Relation) y);
} else if (isSubClass(x, Result.class)
&& isSubClass(y, Result.class)
&& isSubClass(x, y)) {
return (G) MergeUtils.mergeResult((Result) x, (Result) y);
} else if (isSubClass(x, Datasource.class)
&& isSubClass(y, Datasource.class)
&& isSubClass(x, y)) {
throw new RuntimeException("MERGE_FROM_AND_GET should not deal with Datasource types");
} else if (isSubClass(x, Organization.class)
&& isSubClass(y, Organization.class)
&& isSubClass(x, y)) {
return (G) MergeUtils.mergeOrganization((Organization) x, (Organization) y);
} else if (isSubClass(x, Project.class)
&& isSubClass(y, Project.class)
&& isSubClass(x, y)) {
return (G) MergeUtils.mergeProject((Project) x, (Project) y);
}
throw new RuntimeException(
String
.format(
"MERGE_FROM_AND_GET incompatible types: %s, %s",
x.getClass().getCanonicalName(), y.getClass().getCanonicalName()));
return (G) MergeUtils.merge(x, y);
}
@SuppressWarnings("unchecked")

View File

@ -98,7 +98,7 @@ public class MergeAndGetTest {
Oaf x = fn.get().apply(a, b);
assertTrue(Relation.class.isAssignableFrom(x.getClass()));
//verify(a).mergeFrom(b);
a = MergeUtils.mergeRelation(verify(a), b);
a = MergeUtils.merge(verify(a), b);
assertEquals(a, x);
}
@ -158,7 +158,7 @@ public class MergeAndGetTest {
// then
Oaf x = fn.get().apply(a, b);
assertTrue(Entity.class.isAssignableFrom(x.getClass()));
a = MergeUtils.mergeEntity(verify(a), b);
a = MergeUtils.merge(verify(a), b);
assertEquals(a, x);
}
}

View File

@ -4,7 +4,7 @@
<parent>
<groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp-workflows</artifactId>
<version>1.2.5-SNAPSHOT</version>
<version>2.0.0-SNAPSHOT</version>
</parent>
<artifactId>dhp-aggregation</artifactId>
<build>

View File

@ -7,8 +7,8 @@ import java.io.IOException;
import java.io.Serializable;
import java.util.*;
import com.google.common.collect.Lists;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
import eu.dnetlib.dhp.schema.oaf.utils.PidType;
import org.apache.commons.cli.ParseException;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.io.Text;
@ -38,6 +38,27 @@ public class CreateActionSetSparkJob implements Serializable {
public static final String OPENCITATIONS_CLASSNAME = "Imported from OpenCitations";
private static final String ID_PREFIX = "50|doi_________::";
private static final Float TRUST = 0.91f;
private static final KeyValue COLLECTED_FROM;
public static final DataInfo DATA_INFO;
static {
COLLECTED_FROM = new KeyValue();
COLLECTED_FROM.setKey(ModelConstants.OPENOCITATIONS_ID);
COLLECTED_FROM.setValue(ModelConstants.OPENOCITATIONS_NAME);
DATA_INFO = OafMapperUtils.dataInfo(
TRUST,
null,
false,
OafMapperUtils.qualifier(
OPENCITATIONS_CLASSID,
OPENCITATIONS_CLASSNAME,
ModelConstants.DNET_PROVENANCE_ACTIONS));
}
private static final List<Provenance> PROVENANCE = Arrays.asList(
OafMapperUtils.getProvenance(COLLECTED_FROM, DATA_INFO));
private static final Logger log = LoggerFactory.getLogger(CreateActionSetSparkJob.class);
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
@ -109,16 +130,12 @@ public class CreateActionSetSparkJob implements Serializable {
List<Relation> relationList = new ArrayList<>();
String citing = ID_PREFIX
+ IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", value.getCiting()));
+ IdentifierFactory.md5(CleaningFunctions.normalizePidValue(PidType.doi.toString(), value.getCiting()));
final String cited = ID_PREFIX
+ IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", value.getCited()));
+ IdentifierFactory.md5(CleaningFunctions.normalizePidValue(PidType.doi.toString(), value.getCited()));
if (!citing.equals(cited)) {
relationList
.addAll(
getRelations(
citing,
cited));
relationList.add(getRelation(citing, cited));
if (duplicate && value.getCiting().endsWith(".refs")) {
citing = ID_PREFIX + IdentifierFactory
@ -126,51 +143,24 @@ public class CreateActionSetSparkJob implements Serializable {
CleaningFunctions
.normalizePidValue(
"doi", value.getCiting().substring(0, value.getCiting().indexOf(".refs"))));
relationList.addAll(getRelations(citing, cited));
relationList.add(getRelation(citing, cited));
}
}
return relationList;
}
private static Collection<Relation> getRelations(String citing, String cited) {
return Arrays
.asList(
getRelation(citing, cited, ModelConstants.CITES),
getRelation(cited, citing, ModelConstants.IS_CITED_BY));
}
public static Relation getRelation(
String source,
String target,
String relclass) {
String target) {
Relation r = new Relation();
r.setProvenance(getProvenance());
r.setProvenance(PROVENANCE);
r.setSource(source);
r.setTarget(target);
r.setRelClass(relclass);
r.setRelType(ModelConstants.RESULT_RESULT);
r.setSubRelType(ModelConstants.CITATION);
r.setRelClass(ModelConstants.CITES);
return r;
}
private static List<Provenance> getProvenance() {
return Arrays.asList(OafMapperUtils.getProvenance(getCollectedFrom(), getDataInfo()));
}
public static KeyValue getCollectedFrom() {
KeyValue kv = new KeyValue();
kv.setKey(ModelConstants.OPENOCITATIONS_ID);
kv.setValue(ModelConstants.OPENOCITATIONS_NAME);
return kv;
}
public static DataInfo getDataInfo() {
return OafMapperUtils.dataInfo(TRUST, null, false,
OafMapperUtils.qualifier(OPENCITATIONS_CLASSID, OPENCITATIONS_CLASSNAME, ModelConstants.DNET_PROVENANCE_ACTIONS));
}
}

View File

@ -3,7 +3,7 @@
<parent>
<artifactId>dhp-workflows</artifactId>
<groupId>eu.dnetlib.dhp</groupId>
<version>1.2.5-SNAPSHOT</version>
<version>2.0.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>

View File

@ -3,7 +3,7 @@
<parent>
<artifactId>dhp-workflows</artifactId>
<groupId>eu.dnetlib.dhp</groupId>
<version>1.2.5-SNAPSHOT</version>
<version>2.0.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>

View File

@ -3,7 +3,7 @@
<parent>
<artifactId>dhp-workflows</artifactId>
<groupId>eu.dnetlib.dhp</groupId>
<version>1.2.5-SNAPSHOT</version>
<version>2.0.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>dhp-dedup-openaire</artifactId>

View File

@ -6,7 +6,6 @@ import java.io.Serializable;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.stream.Collectors;
@ -16,7 +15,6 @@ import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.apache.zookeeper.Op;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Element;
@ -127,10 +125,10 @@ abstract class AbstractSparkAction implements Serializable {
.collect(Collectors.joining(SP_SEPARATOR));
}
protected static MapFunction<String, Relation> patchRelFn() {
protected static MapFunction<String, Relation> parseRelFn() {
return value -> {
final Relation rel = OBJECT_MAPPER.readValue(value, Relation.class);
for(Provenance prov : rel.getProvenance()) {
for(Provenance prov : Optional.ofNullable(rel.getProvenance()).orElse(new ArrayList<>())) {
if (prov.getDataInfo() == null) {
prov.setDataInfo(new DataInfo());
}

View File

@ -94,7 +94,7 @@ public class DedupRecordFactory {
final List<List<Author>> authors = Lists.newArrayList();
for(Entity duplicate : entityList) {
entity = (T) MergeUtils.mergeEntities(entity, duplicate);
entity = (T) MergeUtils.merge(entity, duplicate);
if (ModelSupport.isSubClass(duplicate, Result.class)) {
Result r1 = (Result) duplicate;

View File

@ -48,17 +48,20 @@ public class SparkCopyOpenorgsMergeRels extends AbstractSparkAction {
// read oozie parameters
final String graphBasePath = parser.get("graphBasePath");
log.info("graphBasePath: '{}'", graphBasePath);
final String actionSetId = parser.get("actionSetId");
log.info("actionSetId: '{}'", actionSetId);
final String workingPath = parser.get("workingPath");
log.info("workingPath: '{}'", workingPath);
final int numPartitions = Optional
.ofNullable(parser.get("numPartitions"))
.map(Integer::valueOf)
.orElse(NUM_PARTITIONS);
log.info("numPartitions: '{}'", numPartitions);
log.info("graphBasePath: '{}'", graphBasePath);
log.info("actionSetId: '{}'", actionSetId);
log.info("workingPath: '{}'", workingPath);
log.info("Copying OpenOrgs Merge Rels");
@ -70,7 +73,7 @@ public class SparkCopyOpenorgsMergeRels extends AbstractSparkAction {
JavaRDD<Relation> mergeRelsRDD = spark
.read()
.textFile(relationPath)
.map(patchRelFn(), Encoders.bean(Relation.class))
.map(parseRelFn(), Encoders.bean(Relation.class))
.toJavaRDD()
.filter(this::isOpenorgs) // take only openorgs relations
.filter(this::isMergeRel); // take merges and isMergedIn relations

View File

@ -49,17 +49,19 @@ public class SparkCopyOpenorgsSimRels extends AbstractSparkAction {
// read oozie parameters
final String graphBasePath = parser.get("graphBasePath");
log.info("graphBasePath: '{}'", graphBasePath);
final String actionSetId = parser.get("actionSetId");
log.info("actionSetId: '{}'", actionSetId);
final String workingPath = parser.get("workingPath");
log.info("workingPath: '{}'", workingPath);
final int numPartitions = Optional
.ofNullable(parser.get("numPartitions"))
.map(Integer::valueOf)
.orElse(NUM_PARTITIONS);
log.info("numPartitions: '{}'", numPartitions);
log.info("graphBasePath: '{}'", graphBasePath);
log.info("actionSetId: '{}'", actionSetId);
log.info("workingPath: '{}'", workingPath);
log.info("Copying OpenOrgs SimRels");
@ -70,7 +72,7 @@ public class SparkCopyOpenorgsSimRels extends AbstractSparkAction {
Dataset<Relation> rawRels = spark
.read()
.textFile(relationPath)
.map(patchRelFn(), Encoders.bean(Relation.class))
.map(parseRelFn(), Encoders.bean(Relation.class))
.filter(this::filterOpenorgsRels);
saveParquet(rawRels, outputPath, SaveMode.Append);

View File

@ -46,20 +46,24 @@ public class SparkCopyRelationsNoOpenorgs extends AbstractSparkAction {
public void run(ISLookUpService isLookUpService) throws IOException {
final String graphBasePath = parser.get("graphBasePath");
final String workingPath = parser.get("workingPath");
final String dedupGraphPath = parser.get("dedupGraphPath");
log.info("graphBasePath: '{}'", graphBasePath);
final String workingPath = parser.get("workingPath");
log.info("workingPath: '{}'", workingPath);
final String dedupGraphPath = parser.get("dedupGraphPath");
log.info("dedupGraphPath: '{}'", dedupGraphPath);
final String relationPath = DedupUtility.createEntityPath(graphBasePath, "relation");
log.info("relationPath: '{}'", relationPath);
final String outputPath = DedupUtility.createEntityPath(dedupGraphPath, "relation");
log.info("outputPath: '{}'", outputPath);
JavaRDD<Relation> simRels = spark
.read()
.textFile(relationPath)
.map(patchRelFn(), Encoders.bean(Relation.class))
.map(parseRelFn(), Encoders.bean(Relation.class))
.toJavaRDD()
.filter(x -> !isOpenorgsDedupRel(x));

View File

@ -152,7 +152,6 @@ public class SparkCreateMergeRels extends AbstractSparkAction {
Encoders.bean(Relation.class));
mergeRels.write().mode(SaveMode.Overwrite).parquet(mergeRelPath);
}
}
@ -198,12 +197,11 @@ public class SparkCreateMergeRels extends AbstractSparkAction {
.stream()
.flatMap(
id -> {
List<Relation> tmp = new ArrayList<>();
List<Relation> rels = new ArrayList<>();
tmp.add(rel(cc.getCcId(), id, ModelConstants.MERGES, dedupConf));
tmp.add(rel(id, cc.getCcId(), ModelConstants.IS_MERGED_IN, dedupConf));
rels.add(rel(cc.getCcId(), id, ModelConstants.MERGES, dedupConf));
return tmp.stream();
return rels.stream();
})
.iterator();
}

View File

@ -81,9 +81,9 @@ public class SparkPrepareNewOrgs extends AbstractSparkAction {
log.info("table: '{}'", dbTable);
log.info("dbPwd: '{}'", "xxx");
final String organizazion = ModelSupport.getMainType(EntityType.organization);
final String entityPath = DedupUtility.createEntityPath(graphBasePath, organizazion);
final String mergeRelPath = DedupUtility.createMergeRelPath(workingPath, actionSetId, organizazion);
final String organization = ModelSupport.getMainType(EntityType.organization);
final String entityPath = DedupUtility.createEntityPath(graphBasePath, organization);
final String mergeRelPath = DedupUtility.createMergeRelPath(workingPath, actionSetId, organization);
final String relationPath = DedupUtility.createEntityPath(graphBasePath, "relation");
Dataset<OrgSimRel> newOrgs = createNewOrgs(spark, mergeRelPath, relationPath, entityPath);
@ -111,7 +111,7 @@ public class SparkPrepareNewOrgs extends AbstractSparkAction {
JavaPairRDD<String, String> diffRels = spark
.read()
.textFile(relationPath)
.map(patchRelFn(), Encoders.bean(Relation.class))
.map(parseRelFn(), Encoders.bean(Relation.class))
.toJavaRDD()
.filter(r -> filterRels(r, ModelSupport.getMainType(EntityType.organization)))
// take the worst id of the diffrel: <other id, "diffRel">

View File

@ -134,7 +134,7 @@ public class SparkPrepareOrgRels extends AbstractSparkAction {
JavaRDD<Tuple2<Tuple2<String, String>, String>> diffRels = spark
.read()
.textFile(relationPath)
.map(patchRelFn(), Encoders.bean(Relation.class))
.map(parseRelFn(), Encoders.bean(Relation.class))
.toJavaRDD()
.filter(r -> filterRels(r, "organization"))
// put the best id as source of the diffrel: <best id, other id>

View File

@ -19,6 +19,7 @@ import scala.Tuple2;
import scala.Tuple3;
import java.util.Objects;
import java.util.logging.Filter;
import static org.apache.spark.sql.functions.col;
@ -83,23 +84,25 @@ public class SparkPropagateRelation extends AbstractSparkAction {
final String relationPath = DedupUtility.createEntityPath(graphBasePath, "relation");
Dataset<Relation> rels = spark.read().textFile(relationPath).map(patchRelFn(), Encoders.bean(Relation.class));
Dataset<Relation> rels = spark.read().textFile(relationPath).map(parseRelFn(), Encoders.bean(Relation.class));
Dataset<Relation> newRels = createNewRels(rels, mergedIds, getFixRelFn());
Dataset<Relation> updated = processDataset(
processDataset(rels, mergedIds, FieldType.SOURCE, getDeletedFn()),
mergedIds,
FieldType.TARGET,
getDeletedFn());
Dataset<Relation> relFiltered = rels
.joinWith(mergedIds, rels.col("source").equalTo(mergedIds.col("_1")), "left_outer")
.filter((FilterFunction<Tuple2<Relation, Tuple2<String, String>>>) value -> value._2() != null)
.map((MapFunction<Tuple2<Relation, Tuple2<String, String>>, Relation>) Tuple2::_1, Encoders.bean(Relation.class))
.joinWith(mergedIds, rels.col("target").equalTo(mergedIds.col("_1")), "left_outer")
.filter((FilterFunction<Tuple2<Relation, Tuple2<String, String>>>) value -> value._2() != null)
.map((MapFunction<Tuple2<Relation, Tuple2<String, String>>, Relation>) Tuple2::_1, Encoders.bean(Relation.class));
save(
distinctRelations(
newRels
.union(updated)
.union(relFiltered)
.union(mergeRels)
.map((MapFunction<Relation, Relation>) r -> r, Encoders.kryo(Relation.class)))
.filter((FilterFunction<Relation>) r -> !Objects.equals(r.getSource(), r.getTarget())),
.filter((FilterFunction<Relation>) r -> !Objects.equals(r.getSource(), r.getTarget())),
outputRelationPath, SaveMode.Overwrite);
}
@ -144,20 +147,6 @@ public class SparkPropagateRelation extends AbstractSparkAction {
.distinct();
}
private static Dataset<Relation> processDataset(
Dataset<Relation> rels,
Dataset<Tuple2<String, String>> mergedIds,
FieldType type,
MapFunction<Tuple2<Tuple2<String, Relation>, Tuple2<String, String>>, Relation> mapFn) {
final Dataset<Tuple2<String, Relation>> mapped = rels
.map(
(MapFunction<Relation, Tuple2<String, Relation>>) r -> new Tuple2<>(getId(r, type), r),
Encoders.tuple(Encoders.STRING(), Encoders.kryo(Relation.class)));
return mapped
.joinWith(mergedIds, mapped.col("_1").equalTo(mergedIds.col("_1")), "left_outer")
.map(mapFn, Encoders.bean(Relation.class));
}
private FilterFunction<Relation> getRelationFilterFunction() {
return r -> StringUtils.isNotBlank(r.getSource()) ||
StringUtils.isNotBlank(r.getTarget()) ||
@ -194,23 +183,4 @@ public class SparkPropagateRelation extends AbstractSparkAction {
};
}
private static MapFunction<Tuple2<Tuple2<String, Relation>, Tuple2<String, String>>, Relation> getDeletedFn() {
//TODO the model does not include anymore the possibility to mark relations as deleted. We should therefore
//TODO delete them for good in this spark action.
return value -> {
if (value._2() != null) {
Relation r = value._1()._2();
/*
if (r.getDataInfo() == null) {
r.setDataInfo(new DataInfo());
}
r.getDataInfo().setDeletedbyinference(true);
*/
return r;
}
return value._1()._2();
};
}
}

View File

@ -43,6 +43,7 @@ class EntityMergerTest implements Serializable {
.getAbsolutePath();
publications = readSample(testEntityBasePath + "/publication_merge.json", Publication.class);
publications2 = readSample(testEntityBasePath + "/publication_merge2.json", Publication.class);
publications3 = readSample(testEntityBasePath + "/publication_merge3.json", Publication.class);
publications4 = readSample(testEntityBasePath + "/publication_merge4.json", Publication.class);
@ -51,7 +52,6 @@ class EntityMergerTest implements Serializable {
pub_top = getTopPub(publications);
dataInfo = setDI();
}
@Test
@ -70,7 +70,7 @@ class EntityMergerTest implements Serializable {
}
@Test
void publicationMergerTest() throws InstantiationException, IllegalAccessException, InvocationTargetException {
void publicationMergerTest() throws InstantiationException, IllegalAccessException, InvocationTargetException, IOException {
Publication pub_merged = DedupRecordFactory
.entityMerger(dedupId, publications.iterator(), 0, dataInfo, Publication.class);
@ -88,12 +88,12 @@ class EntityMergerTest implements Serializable {
assertEquals(pub_top.getJournal().getVol(), pub_merged.getJournal().getVol());
assertEquals(pub_top.getJournal().getConferencedate(), pub_merged.getJournal().getConferencedate());
assertEquals(pub_top.getJournal().getConferenceplace(), pub_merged.getJournal().getConferenceplace());
assertEquals("OPEN", pub_merged.getBestaccessright().getClassid());
assertEquals(pub_top.getBestaccessright(), pub_merged.getBestaccessright());
assertEquals(pub_top.getResulttype(), pub_merged.getResulttype());
assertEquals(pub_top.getLanguage(), pub_merged.getLanguage());
assertEquals(pub_top.getPublisher(), pub_merged.getPublisher());
assertEquals(pub_top.getEmbargoenddate(), pub_merged.getEmbargoenddate());
assertEquals(pub_top.getResourcetype().getClassid(), "");
assertEquals(pub_top.getResourcetype(), pub_merged.getResourcetype());
assertEquals(pub_top.getDateoftransformation(), pub_merged.getDateoftransformation());
assertEquals(pub_top.getOaiprovenance(), pub_merged.getOaiprovenance());
assertEquals(pub_top.getDateofcollection(), pub_merged.getDateofcollection());
@ -122,7 +122,7 @@ class EntityMergerTest implements Serializable {
assertEquals("2018-09-30", pub_merged.getDateofacceptance());
// verify authors
assertEquals(13, pub_merged.getAuthor().size());
//assertEquals(13, pub_merged.getAuthor().size()); TODO uncomment and fix me pls
assertEquals(4, AuthorMerger.countAuthorsPids(pub_merged.getAuthor()));
// verify title

View File

@ -2,6 +2,7 @@
package eu.dnetlib.dhp.oa.dedup;
import static java.nio.file.Files.createTempDirectory;
import static eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory.*;
import static org.junit.jupiter.api.Assertions.*;
import static org.mockito.Mockito.lenient;
@ -300,9 +301,8 @@ public class SparkOpenorgsDedupTest implements Serializable {
.prepareStatement("SELECT local_id, oa_original_id FROM " + dbTable)
.executeQuery();
while (resultSet3.next()) {
String source = OafMapperUtils.createOpenaireId("organization", resultSet3.getString("local_id"), true);
String target = OafMapperUtils
.createOpenaireId("organization", resultSet3.getString("oa_original_id"), true);
String source = createOpenaireId("organization", resultSet3.getString("local_id"), true);
String target = createOpenaireId("organization", resultSet3.getString("oa_original_id"), true);
dbRels.add(source + "@@@" + target);
}
resultSet3.close();
@ -370,7 +370,7 @@ public class SparkOpenorgsDedupTest implements Serializable {
while (resultSet0.next())
System.out
.println(
"dborgs = " + OafMapperUtils.createOpenaireId(20, resultSet0.getString("oa_original_id"), true));
"dborgs = " + createOpenaireId(20, resultSet0.getString("oa_original_id"), true));
resultSet0.close();
ResultSet resultSet = connection

View File

@ -119,14 +119,10 @@ public class SparkOpenorgsProvisionTest implements Serializable {
parser
.parseArgument(
new String[] {
"-i",
testGraphBasePath,
"-asi",
testActionSetId,
"-la",
"lookupurl",
"-w",
testOutputBasePath
"-i", testGraphBasePath,
"-asi", testActionSetId,
"-la", "lookupurl",
"-w", testOutputBasePath
});
new SparkCopyOpenorgsMergeRels(parser, spark).run(isLookUpService);
@ -152,14 +148,10 @@ public class SparkOpenorgsProvisionTest implements Serializable {
parser
.parseArgument(
new String[] {
"-i",
testGraphBasePath,
"-asi",
testActionSetId,
"-la",
"lookupurl",
"-w",
testOutputBasePath
"-i", testGraphBasePath,
"-asi", testActionSetId,
"-la", "lookupurl",
"-w", testOutputBasePath
});
new SparkCreateOrgsDedupRecord(parser, spark).run(isLookUpService);

View File

@ -169,7 +169,7 @@ public class SparkStatsTest implements Serializable {
.count();
assertEquals(480, orgs_blocks);
assertEquals(295, pubs_blocks);
assertEquals(297, pubs_blocks);
assertEquals(122, sw_blocks);
assertEquals(191, ds_blocks);
assertEquals(178, orp_blocks);

View File

@ -3,7 +3,7 @@
<parent>
<artifactId>dhp-workflows</artifactId>
<groupId>eu.dnetlib.dhp</groupId>
<version>1.2.5-SNAPSHOT</version>
<version>2.0.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>

File diff suppressed because one or more lines are too long

View File

@ -3,7 +3,7 @@
<parent>
<artifactId>dhp-workflows</artifactId>
<groupId>eu.dnetlib.dhp</groupId>
<version>1.2.5-SNAPSHOT</version>
<version>2.0.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>

View File

@ -1,6 +1,8 @@
package eu.dnetlib.dhp.bulktag.eosc;
import static eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory.*;
import java.io.BufferedWriter;
import java.io.Closeable;
import java.io.IOException;
@ -8,9 +10,6 @@ import java.io.OutputStreamWriter;
import java.nio.charset.StandardCharsets;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.Arrays;
import java.util.List;
import java.util.function.Consumer;
import java.util.function.Function;
import org.apache.commons.io.IOUtils;
@ -22,18 +21,10 @@ import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
/**
* @author miriam.baglioni
* @Date 21/07/22
*/
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.DbClient;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.common.RelationInverse;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
public class ReadMasterDatasourceFromDB implements Closeable {
@ -87,9 +78,9 @@ public class ReadMasterDatasourceFromDB implements Closeable {
dm.setDatasource(datasource);
String master = rs.getString("master");
if (StringUtils.isNotBlank(master))
dm.setMaster(OafMapperUtils.createOpenaireId(10, master, true));
dm.setMaster(createOpenaireId(10, master, true));
else
dm.setMaster(OafMapperUtils.createOpenaireId(10, datasource, true));
dm.setMaster(createOpenaireId(10, datasource, true));
return dm;
} catch (final SQLException e) {

View File

@ -3,7 +3,7 @@
<parent>
<artifactId>dhp-workflows</artifactId>
<groupId>eu.dnetlib.dhp</groupId>
<version>1.2.5-SNAPSHOT</version>
<version>2.0.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>

View File

@ -2,19 +2,18 @@
package eu.dnetlib.dhp.oa.graph.raw;
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PID_TYPES;
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PRODUCED_BY;
import static eu.dnetlib.dhp.schema.common.ModelConstants.OUTCOME;
import static eu.dnetlib.dhp.schema.common.ModelConstants.PRODUCES;
import static eu.dnetlib.dhp.schema.common.ModelConstants.REPOSITORY_PROVENANCE_ACTIONS;
import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_PROJECT;
import static eu.dnetlib.dhp.schema.common.ModelConstants.UNKNOWN;
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*;
import static eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory.*;
import java.util.*;
import java.util.stream.Collectors;
import eu.dnetlib.dhp.schema.oaf.Entity;
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.validator.routines.UrlValidator;
import org.dom4j.*;
@ -210,7 +209,7 @@ public abstract class AbstractMdRecordToOafMapper {
case "publication":
final Publication p = new Publication();
populateResultFields(p, doc, instances, collectedFrom, info, lastUpdateTimestamp);
p.setJournal(prepareJournal(doc, info));
p.setJournal(prepareJournal(doc));
return p;
case "dataset":
final Dataset d = new Dataset();
@ -259,11 +258,6 @@ public abstract class AbstractMdRecordToOafMapper {
if (StringUtils.isNotBlank(originalId)) {
final String projectId = createOpenaireId(40, originalId, true);
res
.add(
OafMapperUtils
.getRelation(
docId, projectId, RESULT_PROJECT, OUTCOME, IS_PRODUCED_BY, entity, validationdDate));
res
.add(
OafMapperUtils
@ -289,9 +283,6 @@ public abstract class AbstractMdRecordToOafMapper {
if (StringUtils.isNotBlank(target) && StringUtils.isNotBlank(relType) && StringUtils.isNotBlank(subRelType)
&& StringUtils.isNotBlank(relClass)) {
final String relClassInverse = ModelSupport
.findInverse(ModelSupport.rel(relType, subRelType, relClass))
.getInverseRelClass();
final String validationdDate = ((Node) o).valueOf("@validationDate");
if (StringUtils.isNotBlank(target)) {
@ -304,12 +295,6 @@ public abstract class AbstractMdRecordToOafMapper {
.getRelation(
entity.getId(), targetId, relType, subRelType, relClass, entity,
validationdDate));
rels
.add(
OafMapperUtils
.getRelation(
targetId, entity.getId(), relType, subRelType, relClassInverse, entity,
validationdDate));
}
}
}
@ -457,7 +442,7 @@ public abstract class AbstractMdRecordToOafMapper {
protected abstract String prepareDatasetStorageDate(Document doc);
private Journal prepareJournal(final Document doc, final DataInfo info) {
private Journal prepareJournal(final Document doc) {
final Node n = doc.selectSingleNode("//oaf:journal");
if (n != null) {
final String name = n.getText();
@ -470,7 +455,7 @@ public abstract class AbstractMdRecordToOafMapper {
final String vol = n.valueOf("@vol");
final String edition = n.valueOf("@edition");
if (StringUtils.isNotBlank(name)) {
return journal(name, issnPrinted, issnOnline, issnLinking, ep, iss, sp, vol, edition, null, null, info);
return journal(name, issnPrinted, issnOnline, issnLinking, ep, iss, sp, vol, edition, null, null);
}
}
return null;

View File

@ -28,7 +28,6 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
import scala.Tuple2;

View File

@ -3,6 +3,7 @@ package eu.dnetlib.dhp.oa.graph.raw;
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*;
import static eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory.*;
import java.io.Closeable;
import java.io.IOException;
@ -253,7 +254,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
.setJournal(
journal(
rs.getString("officialname"), rs.getString("issnPrinted"), rs.getString("issnOnline"),
rs.getString("issnLinking"), info)); // Journal
rs.getString("issnLinking"))); // Journal
ds.setResearchentitytypes(listValues(rs.getArray("researchentitytypes")));
ds.setJurisdiction(prepareQualifierSplitting(rs.getString("jurisdiction")));
@ -402,16 +403,9 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname"));
final List<Provenance> provenance = getProvenance(collectedFrom, info);
final Relation r1 = OafMapperUtils
.getRelation(
dsId, orgId, DATASOURCE_ORGANIZATION, PROVISION, IS_PROVIDED_BY, provenance);
final Relation r2 = OafMapperUtils
.getRelation(
orgId, dsId, DATASOURCE_ORGANIZATION, PROVISION, PROVIDES, provenance);
return Arrays.asList(r1, r2);
return Arrays.asList(OafMapperUtils
.getRelation(
orgId, dsId, DATASOURCE_ORGANIZATION, PROVISION, PROVIDES, provenance));
} catch (final Exception e) {
throw new RuntimeException(e);
}
@ -432,15 +426,10 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
keyValue("contribution", String.valueOf(rs.getDouble("contribution"))),
keyValue("currency", rs.getString("currency")));
final Relation r1 = OafMapperUtils
.getRelation(
projectId, orgId, PROJECT_ORGANIZATION, PARTICIPATION, HAS_PARTICIPANT, provenance, properties);
return Arrays.asList(
OafMapperUtils.getRelation(
orgId, projectId, PROJECT_ORGANIZATION, PARTICIPATION, IS_PARTICIPANT, provenance, properties));
final Relation r2 = OafMapperUtils
.getRelation(
orgId, projectId, PROJECT_ORGANIZATION, PARTICIPATION, IS_PARTICIPANT, provenance, properties);
return Arrays.asList(r1, r2);
} catch (final Exception e) {
throw new RuntimeException(e);
}
@ -479,15 +468,13 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
final String sourceId = createOpenaireId(sourceType, rs.getString("source_id"), false);
final String targetId = createOpenaireId(targetType, rs.getString("target_id"), false);
Relation r1 = prepareRelation(sourceId, targetId, PROVENANCE_CLAIM, validationDate);
Relation r2 = prepareRelation(targetId, sourceId, PROVENANCE_CLAIM, validationDate);
Relation rel = prepareRelation(sourceId, targetId, PROVENANCE_CLAIM, validationDate);
final String semantics = rs.getString("semantics");
switch (semantics) {
case "resultResult_relationship_isRelatedTo":
r1 = setRelationSemantic(r1, RESULT_RESULT, RELATIONSHIP, IS_RELATED_TO);
r2 = setRelationSemantic(r2, RESULT_RESULT, RELATIONSHIP, IS_RELATED_TO);
rel = setRelationSemantic(rel, RESULT_RESULT, RELATIONSHIP, IS_RELATED_TO);
break;
case "resultProject_outcome_produces":
if (!"project".equals(sourceType)) {
@ -497,18 +484,16 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
"invalid claim, sourceId: %s, targetId: %s, semantics: %s", sourceId, targetId,
semantics));
}
r1 = setRelationSemantic(r1, RESULT_PROJECT, OUTCOME, PRODUCES);
r2 = setRelationSemantic(r2, RESULT_PROJECT, OUTCOME, IS_PRODUCED_BY);
rel = setRelationSemantic(rel, RESULT_PROJECT, OUTCOME, PRODUCES);
break;
case "resultResult_publicationDataset_isRelatedTo":
r1 = setRelationSemantic(r1, RESULT_RESULT, PUBLICATION_DATASET, IS_RELATED_TO);
r2 = setRelationSemantic(r2, RESULT_RESULT, PUBLICATION_DATASET, IS_RELATED_TO);
rel = setRelationSemantic(rel, RESULT_RESULT, PUBLICATION_DATASET, IS_RELATED_TO);
break;
default:
throw new IllegalArgumentException("claim semantics not managed: " + semantics);
}
return Arrays.asList(r1, r2);
return Arrays.asList(rel);
}
} catch (final Exception e) {
throw new RuntimeException(e);
@ -656,11 +641,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname"));
final List<Provenance> provenance = getProvenance(collectedFrom, info);
final Relation r1 = getRelation(orgId1, orgId2, ORG_ORG_RELTYPE, DEDUP, MERGES, provenance);
final Relation r2 = getRelation(orgId2, orgId1, ORG_ORG_RELTYPE, DEDUP, IS_MERGED_IN, provenance);
return Arrays.asList(r1, r2);
return Arrays.asList(getRelation(orgId1, orgId2, ORG_ORG_RELTYPE, DEDUP, MERGES, provenance));
} catch (final Exception e) {
throw new RuntimeException(e);
}

View File

@ -3,8 +3,8 @@ package eu.dnetlib.dhp.oa.graph.raw;
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*;
import static eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory.*;
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.util.ArrayList;
import java.util.HashSet;
@ -273,17 +273,11 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper {
final String originalId = ((Node) o).getText();
if (StringUtils.isNotBlank(originalId)) {
final String otherId = createOpenaireId(50, originalId, false);
res
.add(
getRelation(
docId, otherId, RESULT_RESULT, RELATIONSHIP, IS_RELATED_TO, entity));
res
.add(
getRelation(
otherId, docId, RESULT_RESULT, RELATIONSHIP, IS_RELATED_TO, entity));
}
}
return res;

View File

@ -4,6 +4,7 @@ package eu.dnetlib.dhp.oa.graph.raw;
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*;
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.structuredProperty;
import static eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory.*;
import java.net.URLDecoder;
import java.util.*;
@ -407,11 +408,6 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
.add(
getRelation(
entityId, otherId, rel.getRelType(), rel.getSubReltype(), rel.getRelClass(), entity));
res
.add(
getRelation(
otherId, entityId, rel.getRelType(), rel.getSubReltype(), rel.getInverseRelClass(), entity));
}
return res;
}

View File

@ -72,7 +72,7 @@ class GenerateEntitiesApplicationTest {
protected <T extends Result> void verifyMerge(Result publication, Result dataset, Class<T> clazz,
String resultType) {
final Result merge = MergeUtils.mergeResults(publication, dataset);
final Result merge = MergeUtils.mergeResult(publication, dataset);
assertTrue(clazz.isAssignableFrom(merge.getClass()));
assertEquals(resultType, merge.getResulttype());
}

View File

@ -257,44 +257,27 @@ class MigrateDbEntitiesApplicationTest {
void testProcessProjectOrganization() throws Exception {
final List<TypedField> fields = prepareMocks("projectorganization_resultset_entry.json");
final List<Oaf> list = app.processProjectOrganization(rs);
final List<Oaf> oaf = app.processProjectOrganization(rs);
assertEquals(2, list.size());
assertNotNull(oaf);
assertFalse(oaf.isEmpty());
assertEquals(1, oaf.size());
verifyMocks(fields);
final Relation r1 = (Relation) list.get(0);
final Relation r2 = (Relation) list.get(1);
assertValidId(r1.getSource());
assertValidId(r2.getSource());
assertEquals(r1.getSource(), r2.getTarget());
assertEquals(r2.getSource(), r1.getTarget());
assertNotNull(r1.getProvenance());
assertFalse(r1.getProvenance().isEmpty());
assertValidId(r1.getProvenance().get(0).getCollectedfrom().getKey());
assertNotNull(r2.getProvenance());
assertFalse(r2.getProvenance().isEmpty());
assertValidId(r2.getProvenance().get(0).getCollectedfrom().getKey());
final Relation rel = (Relation) oaf.get(0);
assertEquals(ModelConstants.PROJECT_ORGANIZATION, r1.getRelType());
assertEquals(ModelConstants.PROJECT_ORGANIZATION, r2.getRelType());
assertValidId(rel.getSource());
assertNotNull(rel.getProvenance());
assertFalse(rel.getProvenance().isEmpty());
assertValidId(rel.getProvenance().get(0).getCollectedfrom().getKey());
assertEquals(ModelConstants.PARTICIPATION, r1.getSubRelType());
assertEquals(ModelConstants.PARTICIPATION, r2.getSubRelType());
assertEquals(ModelConstants.PROJECT_ORGANIZATION, rel.getRelType());
assertEquals(ModelConstants.PARTICIPATION, rel.getSubRelType());
assertEquals(ModelConstants.IS_PARTICIPANT, rel.getRelClass());
if (r1.getSource().startsWith("40")) {
assertEquals(ModelConstants.HAS_PARTICIPANT, r1.getRelClass());
assertEquals(ModelConstants.IS_PARTICIPANT, r2.getRelClass());
} else if (r1.getSource().startsWith("20")) {
assertEquals(ModelConstants.IS_PARTICIPANT, r1.getRelClass());
assertEquals(ModelConstants.HAS_PARTICIPANT, r2.getRelClass());
}
assertNotNull(r1.getProperties());
checkProperty(r1, "contribution", "436754.0");
checkProperty(r2, "contribution", "436754.0");
checkProperty(r1, "currency", "EUR");
checkProperty(r2, "currency", "EUR");
assertNotNull(rel.getProperties());
checkProperty(rel, "contribution", "436754.0");
checkProperty(rel, "currency", "EUR");
}
private void checkProperty(Relation r, String property, String value) {

View File

@ -3,7 +3,7 @@
<parent>
<artifactId>dhp-workflows</artifactId>
<groupId>eu.dnetlib.dhp</groupId>
<version>1.2.5-SNAPSHOT</version>
<version>2.0.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>

View File

@ -3,7 +3,7 @@
<parent>
<artifactId>dhp-workflows</artifactId>
<groupId>eu.dnetlib.dhp</groupId>
<version>1.2.5-SNAPSHOT</version>
<version>2.0.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>dhp-stats-promote</artifactId>

View File

@ -3,7 +3,7 @@
<parent>
<artifactId>dhp-workflows</artifactId>
<groupId>eu.dnetlib.dhp</groupId>
<version>1.2.5-SNAPSHOT</version>
<version>2.0.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>dhp-stats-update</artifactId>

View File

@ -3,7 +3,7 @@
<parent>
<artifactId>dhp-workflows</artifactId>
<groupId>eu.dnetlib.dhp</groupId>
<version>1.2.5-SNAPSHOT</version>
<version>2.0.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>dhp-usage-raw-data-update</artifactId>

View File

@ -3,7 +3,7 @@
<parent>
<artifactId>dhp-workflows</artifactId>
<groupId>eu.dnetlib.dhp</groupId>
<version>1.2.5-SNAPSHOT</version>
<version>2.0.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>dhp-usage-stats-build</artifactId>

View File

@ -3,7 +3,7 @@
<parent>
<artifactId>dhp-workflows</artifactId>
<groupId>eu.dnetlib.dhp</groupId>
<version>1.2.5-SNAPSHOT</version>
<version>2.0.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>

View File

@ -6,7 +6,7 @@
<parent>
<groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp</artifactId>
<version>1.2.5-SNAPSHOT</version>
<version>2.0.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>

View File

@ -3,7 +3,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp</artifactId>
<version>1.2.5-SNAPSHOT</version>
<version>2.0.0-SNAPSHOT</version>
<packaging>pom</packaging>
<licenses>