wip: large refactoring

This commit is contained in:
Claudio Atzori 2023-02-09 12:32:28 +01:00
parent d9c9482a5b
commit 934c1846f8
70 changed files with 1537 additions and 1418 deletions

View File

@ -6,7 +6,7 @@
<parent>
<groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp-build</artifactId>
<version>1.2.5-SNAPSHOT</version>
<version>2.0.0-SNAPSHOT</version>
</parent>
<artifactId>dhp-build-assembly-resources</artifactId>

View File

@ -6,7 +6,7 @@
<parent>
<groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp-build</artifactId>
<version>1.2.5-SNAPSHOT</version>
<version>2.0.0-SNAPSHOT</version>
</parent>
<artifactId>dhp-build-properties-maven-plugin</artifactId>

View File

@ -4,7 +4,7 @@
<parent>
<groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp</artifactId>
<version>1.2.5-SNAPSHOT</version>
<version>2.0.0-SNAPSHOT</version>
</parent>
<artifactId>dhp-build</artifactId>
<packaging>pom</packaging>

View File

@ -5,7 +5,7 @@
<parent>
<groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp</artifactId>
<version>1.2.5-SNAPSHOT</version>
<version>2.0.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>

View File

@ -1,6 +1,8 @@
package eu.dnetlib.dhp.common.action;
import static eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory.*;
import java.io.BufferedWriter;
import java.io.IOException;
import java.io.OutputStreamWriter;
@ -19,7 +21,6 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.common.DbClient;
import eu.dnetlib.dhp.common.action.model.MasterDuplicate;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
public class ReadDatasourceMasterDuplicateFromDB {
@ -59,8 +60,8 @@ public class ReadDatasourceMasterDuplicateFromDB {
final String masterId = rs.getString("masterId");
final String masterName = rs.getString("masterName");
md.setDuplicateId(OafMapperUtils.createOpenaireId(10, duplicateId, true));
md.setMasterId(OafMapperUtils.createOpenaireId(10, masterId, true));
md.setDuplicateId(createOpenaireId(10, duplicateId, true));
md.setMasterId(createOpenaireId(10, masterId, true));
md.setMasterName(masterName);
return md;

View File

@ -121,10 +121,12 @@ public class AuthorMerger {
}
public static String pidToComparableString(StructuredProperty pid) {
final String classid = pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase()
: "";
return (pid.getQualifier() != null ? classid : "")
+ (pid.getValue() != null ? pid.getValue().toLowerCase() : "");
return pid.toComparableString();
/*
* final String classid = pid.getQualifier().getClassid() != null ?
* pid.getQualifier().getClassid().toLowerCase() : ""; return (pid.getQualifier() != null ? classid : "") +
* (pid.getValue() != null ? pid.getValue().toLowerCase() : "");
*/
}
public static int countAuthorsPids(List<Author> authors) {

View File

@ -10,8 +10,6 @@ import java.util.Objects;
import java.util.Optional;
import java.util.stream.Collectors;
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.spark.SparkConf;
@ -33,6 +31,8 @@ import com.jayway.jsonpath.Option;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
import scala.Tuple2;
/**
@ -120,7 +120,7 @@ public class GroupEntitiesSparkJob {
private Entity mergeAndGet(Entity b, Entity a) {
if (Objects.nonNull(a) && Objects.nonNull(b)) {
return MergeUtils.mergeEntities(b, a);
return MergeUtils.merge(b, a);
}
return Objects.isNull(a) ? b : a;
}

View File

@ -0,0 +1,252 @@
package eu.dnetlib.dhp.schema.common;
import eu.dnetlib.dhp.schema.oaf.AccessRight;
import eu.dnetlib.dhp.schema.oaf.KeyValue;
import eu.dnetlib.dhp.schema.oaf.Qualifier;
public class ModelConstants {
private ModelConstants() {}
public static final String ORCID = "orcid";
public static final String ORCID_PENDING = "orcid_pending";
public static final String ORCID_CLASSNAME = "Open Researcher and Contributor ID";
public static final String ORCID_DS = ORCID.toUpperCase();
public static final String CROSSREF_ID = "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2";
public static final String CROSSREF_NAME = "Crossref";
public static final String DATACITE_ID = "10|openaire____::9e3be59865b2c1c335d32dae2fe7b254";
public static final String ZENODO_OD_ID = "10|opendoar____::358aee4cc897452c00244351e4d91f69";
public static final String ZENODO_R3_ID = "10|re3data_____::7b0ad08687b2c960d5aeef06f811d5e6";
public static final String EUROPE_PUBMED_CENTRAL_ID = "10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c";
public static final String PUBMED_CENTRAL_ID = "10|opendoar____::eda80a3d5b344bc40f3bc04f65b7a357";
public static final String ARXIV_ID = "10|opendoar____::6f4922f45568161a8cdf4ad2299f6d23";
public static final String ROHUB_ID = "10|fairsharing_::1b69ebedb522700034547abc5652ffac";
public static final String OPENORGS_NAME = "OpenOrgs Database";
public static final String OPENOCITATIONS_NAME = "OpenCitations";
public static final String OPENOCITATIONS_ID = "10|openaire____::c06df618c5de1c786535ccf3f8b7b059";
public static final String OPEN_APC_NAME = "OpenAPC Global Initiative";
public static final String OPEN_APC_ID = "10|apc_________::e2b1600b229fc30663c8a1f662debddf";
// VOCABULARY VALUE
public static final String ACCESS_RIGHT_OPEN = "OPEN";
public static final String ACCESS_RIGHT_EMBARGO = "EMBARGO";
public static final String ACCESS_RIGHT_CLOSED = "CLOSED";
public static final String DNET_SUBJECT_KEYWORD = "keyword";
public static final String DNET_SUBJECT_FOS_CLASSID = "FOS";
public static final String DNET_SUBJECT_FOS_CLASSNAME = "Fields of Science and Technology classification";
public static final String DNET_SUBJECT_TYPOLOGIES = "dnet:subject_classification_typologies";
public static final String DNET_RESULT_TYPOLOGIES = "dnet:result_typologies";
public static final String DNET_PUBLICATION_RESOURCE = "dnet:publication_resource";
public static final String DNET_ACCESS_MODES = "dnet:access_modes";
public static final String DNET_LANGUAGES = "dnet:languages";
public static final String DNET_PID_TYPES = "dnet:pid_types";
public static final String DNET_DATACITE_DATE = "dnet:dataCite_date";
public static final String DNET_DATACITE_TITLE = "dnet:dataCite_title";
public static final String DNET_DATA_CITE_RESOURCE = "dnet:dataCite_resource";
public static final String DNET_PROVENANCE_ACTIONS = "dnet:provenanceActions";
public static final String DNET_COUNTRY_TYPE = "dnet:countries";
public static final String DNET_REVIEW_LEVELS = "dnet:review_levels";
public static final String DNET_PROGRAMMING_LANGUAGES = "dnet:programming_languages";
public static final String DNET_EXTERNAL_REFERENCE_TYPE = "dnet:externalReference_typologies";
public static final String DNET_RELATION_RELTYPE = "dnet:relation_relType";
public static final String DNET_RELATION_SUBRELTYPE = "dnet:relation_subRelType";
public static final String DNET_RELATION_RELCLASS = "dnet:relation_relClass";
public static final String PEER_REVIEWED_CLASSNAME = "nonPeerReviewed";
public static final String NON_PEER_REVIEWED_CLASSNAME = "nonPeerReviewed";
public static final String PEER_REVIEWED_CLASSID = "0001";
public static final String NON_PEER_REVIEWED_CLASSID = "0002";
public static final String SYSIMPORT_CROSSWALK_REPOSITORY = "sysimport:crosswalk:repository";
public static final String SYSIMPORT_CROSSWALK_ENTITYREGISTRY = "sysimport:crosswalk:entityregistry";
public static final String SYSIMPORT_ACTIONSET = "sysimport:actionset";
public static final String SYSIMPORT_ORCID_NO_DOI = "sysimport:actionset:orcidworks-no-doi";
public static final String USER_CLAIM = "user:claim";
public static final String HARVESTED = "Harvested";
public static final String PROVENANCE_DEDUP = "sysimport:dedup";
public static final String PROVENANCE_ENRICH = "sysimport:enrich";
public static final Qualifier PROVENANCE_ACTION_SET_QUALIFIER = qualifier(
SYSIMPORT_ACTIONSET, SYSIMPORT_ACTIONSET, DNET_PROVENANCE_ACTIONS);
public static final String DATASET_RESULTTYPE_CLASSID = "dataset";
public static final String PUBLICATION_RESULTTYPE_CLASSID = "publication";
public static final String SOFTWARE_RESULTTYPE_CLASSID = "software";
public static final String ORP_RESULTTYPE_CLASSID = "other";
public static final String RESULT_RESULT = "resultResult"; // relType
/**
* @deprecated Use {@link ModelConstants#RELATIONSHIP} instead.
*/
@Deprecated
public static final String PUBLICATION_DATASET = "publicationDataset"; // subreltype
public static final String SUPPLEMENT = "supplement"; // subreltype
public static final String IS_SUPPLEMENT_TO = "IsSupplementTo";
public static final String IS_SUPPLEMENTED_BY = "IsSupplementedBy";
public static final String PART = "part"; // subreltype
public static final String IS_PART_OF = "IsPartOf";
public static final String HAS_PART = "HasPart";
public static final String RELATIONSHIP = "relationship"; // subreltype
public static final String IS_RELATED_TO = "IsRelatedTo";
public static final String IS_IDENTICAL_TO = "IsIdenticalTo";
public static final String REFERENCES = "References";
public static final String IS_REFERENCED_BY = "IsReferencedBy";
public static final String CONTINUES = "Continues";
public static final String IS_CONTINUED_BY = "IsContinuedBy";
public static final String DOCUMENTS = "Documents";
public static final String IS_DOCUMENTED_BY = "IsDocumentedBy";
public static final String IS_SOURCE_OF = "IsSourceOf";
public static final String IS_DERIVED_FROM = "IsDerivedFrom";
public static final String COMPILES = "Compiles";
public static final String IS_COMPILED_BY = "IsCompiledBy";
public static final String DESCRIBES = "Describes";
public static final String IS_DESCRIBED_BY = "IsDescribedBy";
public static final String IS_METADATA_FOR = "IsMetadataFor";
public static final String IS_METADATA_OF = "IsMetadataOf";
public static final String HAS_ASSOCIATION_WITH = "HasAssociationWith";
public static final String IS_REQUIRED_BY = "IsRequiredBy";
public static final String REQUIRES = "Requires";
public static final String CITATION = "citation"; // subreltype
public static final String CITES = "Cites";
public static final String IS_CITED_BY = "IsCitedBy";
public static final String REVIEW = "review"; // subreltype
public static final String REVIEWS = "Reviews";
public static final String IS_REVIEWED_BY = "IsReviewedBy";
public static final String VERSION = "version"; // subreltype
public static final String IS_VERSION_OF = "IsVersionOf";
public static final String HAS_VERSION = "HasVersion";
public static final String IS_PREVIOUS_VERSION_OF = "IsPreviousVersionOf";
public static final String IS_NEW_VERSION_OF = "IsNewVersionOf";
public static final String IS_VARIANT_FORM_OF = "IsVariantFormOf";
public static final String IS_ORIGINAL_FORM_OF = "IsOriginalFormOf";
public static final String IS_OBSOLETED_BY = "IsObsoletedBy";
public static final String OBSOLETES = "Obsoletes";
public static final String RESULT_PROJECT = "resultProject"; // relType
public static final String OUTCOME = "outcome"; // subreltype
public static final String IS_PRODUCED_BY = "isProducedBy";
public static final String PRODUCES = "produces";
public static final String DATASOURCE_ORGANIZATION = "datasourceOrganization"; // relType
public static final String PROVISION = "provision"; // subreltype
public static final String IS_PROVIDED_BY = "isProvidedBy";
public static final String PROVIDES = "provides";
public static final String PROJECT_ORGANIZATION = "projectOrganization"; // relType
public static final String PARTICIPATION = "participation"; // subreltype
public static final String HAS_PARTICIPANT = "hasParticipant";
public static final String IS_PARTICIPANT = "isParticipant";
public static final String RESULT_ORGANIZATION = "resultOrganization"; // relType
public static final String AFFILIATION = "affiliation"; // subreltype
public static final String IS_AUTHOR_INSTITUTION_OF = "isAuthorInstitutionOf";
public static final String HAS_AUTHOR_INSTITUTION = "hasAuthorInstitution";
public static final String ORG_ORG_RELTYPE = "organizationOrganization"; // relType
public static final String IS_PARENT_OF = "IsParentOf";
public static final String IS_CHILD_OF = "IsChildOf";
public static final String DEDUP = "dedup"; // subreltype
public static final String MERGES = "merges";
public static final String IS_MERGED_IN = "isMergedIn";
public static final String SIMILARITY = "similarity"; // subreltype
public static final String IS_SIMILAR_TO = "isSimilarTo";
public static final String IS_AMONG_TOP_N_SIMILAR_DOCS = "IsAmongTopNSimilarDocuments";
public static final String HAS_AMONG_TOP_N_SIMILAR_DOCS = "HasAmongTopNSimilarDocuments";
public static final String IS_DIFFERENT_FROM = "isDifferentFrom";
public static final String UNKNOWN = "UNKNOWN";
public static final String NOT_AVAILABLE = "not available";
public static final Qualifier PUBLICATION_DEFAULT_RESULTTYPE = qualifier(
PUBLICATION_RESULTTYPE_CLASSID, PUBLICATION_RESULTTYPE_CLASSID,
DNET_RESULT_TYPOLOGIES);
public static final Qualifier DATASET_DEFAULT_RESULTTYPE = qualifier(
DATASET_RESULTTYPE_CLASSID, DATASET_RESULTTYPE_CLASSID,
DNET_RESULT_TYPOLOGIES);
public static final Qualifier SOFTWARE_DEFAULT_RESULTTYPE = qualifier(
SOFTWARE_RESULTTYPE_CLASSID, SOFTWARE_RESULTTYPE_CLASSID,
DNET_RESULT_TYPOLOGIES);
public static final Qualifier ORP_DEFAULT_RESULTTYPE = qualifier(
ORP_RESULTTYPE_CLASSID, ORP_RESULTTYPE_CLASSID,
DNET_RESULT_TYPOLOGIES);
public static final Qualifier REPOSITORY_PROVENANCE_ACTIONS = qualifier(
SYSIMPORT_CROSSWALK_REPOSITORY, SYSIMPORT_CROSSWALK_REPOSITORY,
DNET_PROVENANCE_ACTIONS);
public static final Qualifier ENTITYREGISTRY_PROVENANCE_ACTION = qualifier(
SYSIMPORT_CROSSWALK_ENTITYREGISTRY, SYSIMPORT_CROSSWALK_ENTITYREGISTRY,
DNET_PROVENANCE_ACTIONS);
public static final String UNKNOWN_REPOSITORY_ORIGINALID = "openaire____::1256f046-bf1f-4afc-8b47-d0b147148b18";
public static final KeyValue UNKNOWN_REPOSITORY = keyValue(
"10|openaire____::55045bd2a65019fd8e6741a755395c8c", "Unknown Repository");
public static final Qualifier UNKNOWN_COUNTRY = qualifier(UNKNOWN, "Unknown", DNET_COUNTRY_TYPE);
public static final Qualifier MAIN_TITLE_QUALIFIER = qualifier(
"main title", "main title", DNET_DATACITE_TITLE);
public static final Qualifier ALTERNATIVE_TITLE_QUALIFIER = qualifier(
"alternative title", "alternative title", DNET_DATACITE_TITLE);
private static final Qualifier SUBTITLE_QUALIFIER = qualifier("subtitle", "subtitle", DNET_DATACITE_TITLE);
public static final AccessRight OPEN_ACCESS_RIGHT() {
final AccessRight result = new AccessRight();
result.setClassid(ACCESS_RIGHT_OPEN);
result.setClassid(ACCESS_RIGHT_OPEN);
result.setSchemeid(ModelConstants.DNET_ACCESS_MODES);
return result;
}
private static Qualifier qualifier(
final String classid,
final String classname,
final String schemeid) {
final Qualifier q = new Qualifier();
q.setClassid(classid);
q.setClassname(classname);
q.setSchemeid(schemeid);
return q;
}
private static KeyValue keyValue(final String key, final String value) {
final KeyValue kv = new KeyValue();
kv.setKey(key);
kv.setValue(value);
return kv;
}
}

View File

@ -1,10 +1,10 @@
package eu.dnetlib.dhp.schema.oaf.common;
import eu.dnetlib.dhp.schema.oaf.Qualifier;
import java.util.Comparator;
import eu.dnetlib.dhp.schema.oaf.Qualifier;
public class AccessRightComparator<T extends Qualifier> implements Comparator<T> {
@Override

View File

@ -1,12 +1,8 @@
package eu.dnetlib.dhp.schema.oaf.common;
import com.github.sisyphsu.dateparser.DateParserUtils;
import com.google.common.collect.Maps;
import eu.dnetlib.dhp.schema.oaf.*;
import org.apache.commons.codec.binary.Hex;
import org.apache.commons.lang3.StringUtils;
import static com.google.common.base.Preconditions.checkArgument;
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
import java.nio.charset.StandardCharsets;
import java.security.MessageDigest;
@ -18,8 +14,13 @@ import java.util.Objects;
import java.util.Optional;
import java.util.function.Function;
import static com.google.common.base.Preconditions.checkArgument;
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
import org.apache.commons.codec.binary.Hex;
import org.apache.commons.lang3.StringUtils;
import com.github.sisyphsu.dateparser.DateParserUtils;
import com.google.common.collect.Maps;
import eu.dnetlib.dhp.schema.oaf.*;
/** Oaf model utility methods. */
public class ModelSupport {
@ -129,7 +130,6 @@ public class ModelSupport {
set(relationInverseMap, RESULT_RESULT, RELATIONSHIP, HAS_ASSOCIATION_WITH, HAS_ASSOCIATION_WITH);
set(relationInverseMap, RESULT_RESULT, RELATIONSHIP, IS_REQUIRED_BY, REQUIRES);
set(relationInverseMap, RESULT_RESULT, VERSION, IS_PREVIOUS_VERSION_OF, IS_NEW_VERSION_OF);
set(relationInverseMap, RESULT_RESULT, VERSION, IS_VARIANT_FORM_OF, IS_ORIGINAL_FORM_OF);
set(relationInverseMap, RESULT_RESULT, VERSION, IS_OBSOLETED_BY, OBSOLETES);
@ -138,7 +138,8 @@ public class ModelSupport {
set(relationInverseMap, RESULT_RESULT, REVIEW, IS_REVIEWED_BY, REVIEWS);
}
private static void set(Map<String, RelationInverse> relationInverseMap, String relType, String subRelType, String relClass, String inverseRelClass) {
private static void set(Map<String, RelationInverse> relationInverseMap, String relType, String subRelType,
String relClass, String inverseRelClass) {
relationInverseMap
.put(
rel(relType, subRelType, relClass), new RelationInverse()
@ -178,7 +179,8 @@ public class ModelSupport {
* @return
*/
public static RelationInverse findRelation(final String relationName) {
return relationInverseMap.values()
return relationInverseMap
.values()
.stream()
.filter(r -> relationName.equalsIgnoreCase(r.getRelClass()))
.findFirst()
@ -207,6 +209,10 @@ public class ModelSupport {
return idPrefixMap.get(clazz);
}
public static <X extends Oaf, Y extends Oaf, Z extends Oaf> Boolean sameClass(X left, Y right, Class<Z> superClazz) {
return isSubClass(left, superClazz) && isSubClass(right, superClazz);
}
/**
* Checks subclass-superclass relationship.
*

View File

@ -1,11 +1,11 @@
package eu.dnetlib.dhp.schema.oaf.common;
import java.util.Comparator;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.Qualifier;
import java.util.Comparator;
public class RefereedComparator implements Comparator<Qualifier> {
@Override

View File

@ -1,16 +1,7 @@
package eu.dnetlib.dhp.schema.oaf.utils;
import com.github.sisyphsu.dateparser.DateParserUtils;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
import me.xuender.unidecode.Unidecode;
import org.apache.commons.lang3.StringUtils;
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.getProvenance;
import java.time.LocalDate;
import java.time.ZoneId;
@ -21,7 +12,17 @@ import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.getProvenance;
import org.apache.commons.lang3.StringUtils;
import com.github.sisyphsu.dateparser.DateParserUtils;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
import me.xuender.unidecode.Unidecode;
public class GraphCleaningFunctions extends CleaningFunctions {

View File

@ -12,7 +12,6 @@ import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
import org.apache.commons.codec.binary.Hex;
import org.apache.commons.lang3.StringUtils;
@ -20,6 +19,7 @@ import com.google.common.collect.HashBiMap;
import com.google.common.collect.Maps;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
/**
* Factory class for OpenAIRE identifiers in the Graph
@ -268,7 +268,7 @@ public class IdentifierFactory implements Serializable {
.append(ID_PREFIX_SEPARATOR)
.append(createPrefix(pidType))
.append(ID_SEPARATOR)
.append(md5 ? md5(pidValue) : pidValue)
.append(md5 ? ModelSupport.md5(pidValue) : pidValue)
.toString();
}
@ -281,13 +281,36 @@ public class IdentifierFactory implements Serializable {
return prefix.substring(0, ID_PREFIX_LEN);
}
public static String md5(final String s) {
try {
final MessageDigest md = MessageDigest.getInstance("MD5");
md.update(s.getBytes(StandardCharsets.UTF_8));
return new String(Hex.encodeHex(md.digest()));
} catch (final Exception e) {
public static String createOpenaireId(
final int prefix,
final String originalId,
final boolean to_md5) {
if (StringUtils.isBlank(originalId)) {
return null;
} else if (to_md5) {
final String nsPrefix = StringUtils.substringBefore(originalId, "::");
final String rest = StringUtils.substringAfter(originalId, "::");
return String.format("%s|%s::%s", prefix, nsPrefix, ModelSupport.md5(rest));
} else {
return String.format("%s|%s", prefix, originalId);
}
}
public static String createOpenaireId(
final String type,
final String originalId,
final boolean to_md5) {
switch (type) {
case "datasource":
return createOpenaireId(10, originalId, to_md5);
case "organization":
return createOpenaireId(20, originalId, to_md5);
case "person":
return createOpenaireId(30, originalId, to_md5);
case "project":
return createOpenaireId(40, originalId, to_md5);
default:
return createOpenaireId(50, originalId, to_md5);
}
}

View File

@ -1,68 +1,164 @@
package eu.dnetlib.dhp.schema.oaf.utils;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.common.AccessRightComparator;
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.lang3.tuple.Pair;
import static com.google.common.base.Preconditions.checkArgument;
import static eu.dnetlib.dhp.schema.oaf.common.ModelSupport.isSubClass;
import static eu.dnetlib.dhp.schema.oaf.common.ModelSupport.sameClass;
import java.text.ParseException;
import java.util.*;
import java.util.stream.Collectors;
import static com.google.common.base.Preconditions.checkArgument;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.lang3.tuple.Pair;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.common.AccessRightComparator;
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
public class MergeUtils {
public static Oaf merge(final Oaf left, final Oaf right) {
if (ModelSupport.isSubClass(left, Entity.class)) {
return mergeEntities((Entity) left, (Entity) right);
} else if (ModelSupport.isSubClass(left, Relation.class)) {
return MergeUtils.mergeRelation((Relation) left, (Relation) right);
public static <T extends Oaf> T merge(final T left, final T right) {
if (sameClass(left, right, Entity.class)) {
return mergeEntities(left, right);
} else if (sameClass(left, right, Relation.class)) {
return mergeRelation(left, right);
} else {
throw new IllegalArgumentException("invalid Oaf type:" + left.getClass().getCanonicalName());
throw new RuntimeException(
String
.format(
"MERGE_FROM_AND_GET incompatible types: %s, %s",
left.getClass().getCanonicalName(), right.getClass().getCanonicalName()));
}
}
public static Entity mergeEntities(Entity original, Entity enrich) {
if (ModelSupport.isSubClass(original, Result.class)) {
return mergeResults((Result) original, (Result) enrich);
} else if (ModelSupport.isSubClass(original, Datasource.class)) {
private static <T extends Oaf> T mergeEntities(T left, T right) {
if (sameClass(left, right, Result.class)) {
if (!left.getClass().equals(right.getClass())) {
return mergeResultsOfDifferentTypes(left, right);
}
return mergeResult(left, right);
} else if (sameClass(left, right, Datasource.class)) {
// TODO
return original;
} else if (ModelSupport.isSubClass(original, Organization.class)) {
return mergeOrganization((Organization) original, (Organization) enrich);
} else if (ModelSupport.isSubClass(original, Project.class)) {
return mergeProject((Project) original, (Project) enrich);
return left;
} else if (sameClass(left, right, Organization.class)) {
return mergeOrganization(left, right);
} else if (sameClass(left, right, Project.class)) {
return mergeProject(left, right);
} else {
throw new IllegalArgumentException("invalid Entity subtype:" + original.getClass().getCanonicalName());
throw new RuntimeException(
String
.format(
"MERGE_FROM_AND_GET incompatible types: %s, %s",
left.getClass().getCanonicalName(), right.getClass().getCanonicalName()));
}
}
public static Result mergeResults(Result original, Result enrich) {
/**
* This method is used in the global result grouping phase. It checks if one of the two is from a delegated authority
* https://graph.openaire.eu/docs/data-model/pids-and-identifiers#delegated-authorities and in that case it prefers
* such version.
*
* Otherwise, it considers a resulttype priority order implemented in {@link ResultTypeComparator}
* and proceeds with the canonical property merging.
*
* @param left
* @param right
* @return
*/
private static <T extends Oaf> T mergeResultsOfDifferentTypes(T left, T right) {
final boolean leftFromDelegatedAuthority = isFromDelegatedAuthority(original);
final boolean rightFromDelegatedAuthority = isFromDelegatedAuthority(enrich);
final boolean leftFromDelegatedAuthority = isFromDelegatedAuthority((Result) left);
final boolean rightFromDelegatedAuthority = isFromDelegatedAuthority((Result) right);
if (leftFromDelegatedAuthority && !rightFromDelegatedAuthority) {
return original;
return left;
}
if (!leftFromDelegatedAuthority && rightFromDelegatedAuthority) {
return enrich;
return right;
}
if (new ResultTypeComparator().compare(original, enrich) < 0) {
return MergeUtils.mergeResult(original, enrich);
if (new ResultTypeComparator().compare((Result) left, (Result) right) < 0) {
return mergeResult(left, right);
} else {
return MergeUtils.mergeResult(enrich, original);
return mergeResult(right, left);
}
}
public static Result mergeResult(Result original, Result enrich) {
/**
* Internal utility that merges the common entity fields
*
* @param left
* @param right
* @return
* @param <T>
*/
private static <T extends Oaf> T mergeEntityFields(T left, T right) {
final Result mergedResult = (Result) mergeEntity(original, enrich);
final Entity enrich = (Entity) right;
final Entity mergedEntity = (Entity) left;
mergedEntity.setOriginalId(mergeLists(mergedEntity.getOriginalId(), enrich.getOriginalId()));
mergedEntity.setCollectedfrom(mergeLists(mergedEntity.getCollectedfrom(), enrich.getCollectedfrom()));
if (mergedEntity.getLastupdatetimestamp() == null && enrich.getLastupdatetimestamp() != null) {
mergedEntity.setLastupdatetimestamp(enrich.getLastupdatetimestamp());
} else if (mergedEntity.getLastupdatetimestamp() != null && enrich.getLastupdatetimestamp() != null) {
mergedEntity
.setLastupdatetimestamp(
Long.max(mergedEntity.getLastupdatetimestamp(), enrich.getLastupdatetimestamp()));
}
mergedEntity.setPid(mergeLists(mergedEntity.getPid(), enrich.getPid()));
final int trustCompareResult = compareTrust(mergedEntity, enrich);
if (enrich.getDateofcollection() != null && trustCompareResult < 0)
mergedEntity.setDateofcollection(enrich.getDateofcollection());
if (enrich.getDateoftransformation() != null && trustCompareResult < 0)
mergedEntity.setDateoftransformation(enrich.getDateoftransformation());
mergedEntity.setMeasures(mergeLists(mergedEntity.getMeasures(), enrich.getMeasures()));
mergedEntity.setExtraInfo(mergeLists(mergedEntity.getExtraInfo(), enrich.getExtraInfo()));
return (T) mergedEntity;
}
private static <T extends Oaf> T mergeRelation(T left, T right) {
Relation original = (Relation) left;
Relation enrich = (Relation) right;
checkArgument(Objects.equals(original.getSource(), enrich.getSource()), "source ids must be equal");
checkArgument(Objects.equals(original.getTarget(), enrich.getTarget()), "target ids must be equal");
checkArgument(Objects.equals(original.getRelType(), enrich.getRelType()), "relType(s) must be equal");
checkArgument(
Objects.equals(original.getSubRelType(), enrich.getSubRelType()), "subRelType(s) must be equal");
checkArgument(Objects.equals(original.getRelClass(), enrich.getRelClass()), "relClass(es) must be equal");
original.setProvenance(mergeLists(original.getProvenance(), enrich.getProvenance()));
original.setValidated(original.getValidated() || enrich.getValidated());
try {
original.setValidationDate(ModelSupport.oldest(original.getValidationDate(), enrich.getValidationDate()));
} catch (ParseException e) {
throw new IllegalArgumentException(String
.format(
"invalid validation date format in relation [s:%s, t:%s]: %s", original.getSource(),
original.getTarget(),
original.getValidationDate()));
}
return (T) original;
}
private static <T extends Oaf> T mergeResult(T left, T right) {
Result original = (Result) left;
Result enrich = (Result) right;
final Result mergedResult = mergeEntityFields(original, enrich);
if (StringUtils.isBlank(mergedResult.getProcessingchargeamount())) {
mergedResult.setProcessingchargeamount(enrich.getProcessingchargeamount());
@ -74,15 +170,18 @@ public class MergeUtils {
if (!isAnEnrichment(mergedResult) && !isAnEnrichment(enrich))
mergedResult.setInstance(mergeLists(mergedResult.getInstance(), enrich.getInstance()));
else {
final List<Instance> enrichmentInstances = isAnEnrichment(mergedResult) ? mergedResult.getInstance() : enrich.getInstance();
final List<Instance> enrichedInstances= isAnEnrichment(mergedResult) ? enrich.getInstance(): mergedResult.getInstance();
final List<Instance> enrichmentInstances = isAnEnrichment(mergedResult) ? mergedResult.getInstance()
: enrich.getInstance();
final List<Instance> enrichedInstances = isAnEnrichment(mergedResult) ? enrich.getInstance()
: mergedResult.getInstance();
if (isAnEnrichment(mergedResult))
mergedResult.setDataInfo(enrich.getDataInfo());
mergedResult.setInstance(enrichInstances(enrichedInstances, enrichmentInstances));
}
if (enrich.getBestaccessright() != null
&& new AccessRightComparator<>().compare(enrich.getBestaccessright(), mergedResult.getBestaccessright()) < 0)
&& new AccessRightComparator<>()
.compare(enrich.getBestaccessright(), mergedResult.getBestaccessright()) < 0)
mergedResult.setBestaccessright(enrich.getBestaccessright());
final int trustCompareResult = compareTrust(mergedResult, enrich);
@ -94,9 +193,7 @@ public class MergeUtils {
mergedResult.setLanguage(enrich.getLanguage());
if (Objects.nonNull(enrich.getDateofacceptance())) {
if (Objects.isNull(mergedResult.getDateofacceptance())) {
mergedResult.setDateofacceptance(enrich.getDateofacceptance());
} else if (trustCompareResult < 0) {
if (Objects.isNull(mergedResult.getDateofacceptance()) || trustCompareResult < 0) {
mergedResult.setDateofacceptance(enrich.getDateofacceptance());
}
}
@ -114,7 +211,8 @@ public class MergeUtils {
baseMainTitle = getMainTitle(mergedResult.getTitle());
if (baseMainTitle != null) {
final StructuredProperty p = baseMainTitle;
mergedResult.setTitle(mergedResult.getTitle().stream().filter(t -> t != p).collect(Collectors.toList()));
mergedResult
.setTitle(mergedResult.getTitle().stream().filter(t -> t != p).collect(Collectors.toList()));
}
}
@ -161,237 +259,350 @@ public class MergeUtils {
mergedResult.setContext(mergeLists(mergedResult.getContext(), enrich.getContext()));
mergedResult.setExternalReference(mergeLists(mergedResult.getExternalReference(), enrich.getExternalReference()));
mergedResult
.setExternalReference(mergeLists(mergedResult.getExternalReference(), enrich.getExternalReference()));
if (enrich.getOaiprovenance() != null && trustCompareResult < 0)
mergedResult.setOaiprovenance(enrich.getOaiprovenance());
return mergedResult;
if (isSubClass(mergedResult, Publication.class)) {
return (T) mergePublication(mergedResult, enrich);
}
if (isSubClass(mergedResult, Dataset.class)) {
return (T) mergeDataset(mergedResult, enrich);
}
if (isSubClass(mergedResult, OtherResearchProduct.class)) {
return (T) mergeORP(mergedResult, enrich);
}
if (isSubClass(mergedResult, Software.class)) {
return (T) mergeSoftware(mergedResult, enrich);
}
public static OtherResearchProduct mergeORP(OtherResearchProduct original, OtherResearchProduct enrich) {
final OtherResearchProduct mergedORP = (OtherResearchProduct) mergeResult(original, enrich);
mergeEntityDataInfo(original, enrich);
mergedORP.setContactperson(mergeLists(mergedORP.getContactperson(), enrich.getContactperson()));
mergedORP.setContactgroup(mergeLists(mergedORP.getContactgroup(), enrich.getContactgroup()));
mergedORP.setTool(mergeLists(mergedORP.getTool(), enrich.getTool()));
mergeEntityDataInfo(mergedORP, enrich);
return mergedORP;
return (T) mergedResult;
}
public static Software mergeSoftware(Software original, Software enrich) {
final Software mergedSoftware = (Software) mergeResult(original, enrich);
private static <T extends Oaf> T mergeORP(T left, T right) {
mergedSoftware.setDocumentationUrl(mergeLists(mergedSoftware.getDocumentationUrl(), enrich.getDocumentationUrl()));
final OtherResearchProduct original = (OtherResearchProduct) left;
final OtherResearchProduct enrich = (OtherResearchProduct) right;
mergedSoftware.setCodeRepositoryUrl(enrich.getCodeRepositoryUrl() != null && compareTrust(mergedSoftware,enrich) < 0
original.setContactperson(mergeLists(original.getContactperson(), enrich.getContactperson()));
original.setContactgroup(mergeLists(original.getContactgroup(), enrich.getContactgroup()));
original.setTool(mergeLists(original.getTool(), enrich.getTool()));
mergeEntityDataInfo(original, enrich);
return (T) original;
}
private static <T extends Oaf> T mergeSoftware(T left, T right) {
final Software original = (Software) left;
final Software enrich = (Software) right;
original
.setDocumentationUrl(mergeLists(original.getDocumentationUrl(), enrich.getDocumentationUrl()));
original
.setCodeRepositoryUrl(
enrich.getCodeRepositoryUrl() != null && compareTrust(original, enrich) < 0
? enrich.getCodeRepositoryUrl()
: mergedSoftware.getCodeRepositoryUrl());
: original.getCodeRepositoryUrl());
mergedSoftware.setProgrammingLanguage(enrich.getProgrammingLanguage() != null && compareTrust(mergedSoftware, enrich) < 0
original
.setProgrammingLanguage(
enrich.getProgrammingLanguage() != null && compareTrust(original, enrich) < 0
? enrich.getProgrammingLanguage()
: mergedSoftware.getProgrammingLanguage());
: original.getProgrammingLanguage());
mergeEntityDataInfo(mergedSoftware, enrich);
return mergedSoftware;
mergeEntityDataInfo(original, enrich);
return (T) original;
}
public static Dataset mergeDataset(Dataset original, Dataset enrich) {
private static <T extends Oaf> T mergeDataset(T left, T right) {
Dataset original = (Dataset) left;
Dataset enrich = (Dataset) right;
final Dataset mergedDataset = (Dataset) mergeResult(original, enrich);
original
.setStoragedate(
enrich.getStoragedate() != null && compareTrust(original, enrich) < 0 ? enrich.getStoragedate()
: original.getStoragedate());
mergedDataset.setStoragedate(enrich.getStoragedate() != null && compareTrust(mergedDataset, enrich) < 0 ? enrich.getStoragedate() : mergedDataset.getStoragedate());
original
.setDevice(
enrich.getDevice() != null && compareTrust(original, enrich) < 0 ? enrich.getDevice()
: original.getDevice());
mergedDataset.setDevice(enrich.getDevice() != null && compareTrust(mergedDataset, enrich) < 0 ? enrich.getDevice() : mergedDataset.getDevice());
original
.setSize(
enrich.getSize() != null && compareTrust(original, enrich) < 0 ? enrich.getSize()
: original.getSize());
mergedDataset.setSize(enrich.getSize() != null && compareTrust(mergedDataset, enrich) < 0 ? enrich.getSize() : mergedDataset.getSize());
original
.setVersion(
enrich.getVersion() != null && compareTrust(original, enrich) < 0 ? enrich.getVersion()
: original.getVersion());
mergedDataset.setVersion(enrich.getVersion() != null && compareTrust(mergedDataset, enrich) < 0 ? enrich.getVersion() : mergedDataset.getVersion());
mergedDataset.setLastmetadataupdate(
enrich.getLastmetadataupdate() != null && compareTrust(mergedDataset,enrich) < 0
original
.setLastmetadataupdate(
enrich.getLastmetadataupdate() != null && compareTrust(original, enrich) < 0
? enrich.getLastmetadataupdate()
: mergedDataset.getLastmetadataupdate());
: original.getLastmetadataupdate());
mergedDataset.setMetadataversionnumber(
enrich.getMetadataversionnumber() != null && compareTrust(mergedDataset, enrich) < 0
original
.setMetadataversionnumber(
enrich.getMetadataversionnumber() != null && compareTrust(original, enrich) < 0
? enrich.getMetadataversionnumber()
: mergedDataset.getMetadataversionnumber());
: original.getMetadataversionnumber());
mergedDataset.setGeolocation(mergeLists(mergedDataset.getGeolocation(), enrich.getGeolocation()));
original.setGeolocation(mergeLists(original.getGeolocation(), enrich.getGeolocation()));
mergeEntityDataInfo(mergedDataset, enrich);
mergeEntityDataInfo(original, enrich);
return mergedDataset;
return (T) original;
}
public static Publication mergePublication(Publication original, Publication enrich) {
private static <T extends Oaf> T mergePublication(T original, T enrich) {
final Publication mergedPublication = (Publication) mergeResult(original, enrich);
//add publication specific fields.
mergeEntityDataInfo(mergedPublication, enrich);
return mergedPublication;
mergeEntityDataInfo(original, enrich);
return original;
}
public static Organization mergeOrganization(Organization original, Organization enrich) {
private static <T extends Oaf> T mergeOrganization(T left, T right) {
final Organization mergedOrganization = (Organization) mergeEntity(original, enrich);
Organization original = (Organization) left;
Organization enrich = (Organization) right;
final Organization mergedOrganization = mergeEntityFields(original, enrich);
int ct = compareTrust(mergedOrganization, enrich);
mergedOrganization.setLegalshortname(enrich.getLegalshortname() != null && ct < 0
mergedOrganization
.setLegalshortname(
enrich.getLegalshortname() != null && ct < 0
? enrich.getLegalshortname()
: mergedOrganization.getLegalname());
mergedOrganization.setLegalname(enrich.getLegalname() != null && ct < 0 ?
enrich.getLegalname()
mergedOrganization
.setLegalname(
enrich.getLegalname() != null && ct < 0 ? enrich.getLegalname()
: mergedOrganization.getLegalname());
mergedOrganization.setAlternativeNames(mergeLists(enrich.getAlternativeNames(), mergedOrganization.getAlternativeNames()));
mergedOrganization
.setAlternativeNames(mergeLists(enrich.getAlternativeNames(), mergedOrganization.getAlternativeNames()));
mergedOrganization.setWebsiteurl(enrich.getWebsiteurl() != null && ct < 0
mergedOrganization
.setWebsiteurl(
enrich.getWebsiteurl() != null && ct < 0
? enrich.getWebsiteurl()
: mergedOrganization.getWebsiteurl());
mergedOrganization.setLogourl(enrich.getLogourl() != null && ct < 0
mergedOrganization
.setLogourl(
enrich.getLogourl() != null && ct < 0
? enrich.getLogourl()
: mergedOrganization.getLogourl());
mergedOrganization.setEclegalbody(enrich.getEclegalbody() != null && ct < 0
mergedOrganization
.setEclegalbody(
enrich.getEclegalbody() != null && ct < 0
? enrich.getEclegalbody()
: mergedOrganization.getEclegalbody());
mergedOrganization.setEclegalperson(enrich.getEclegalperson() != null && ct < 0
mergedOrganization
.setEclegalperson(
enrich.getEclegalperson() != null && ct < 0
? enrich.getEclegalperson()
: mergedOrganization.getEclegalperson());
mergedOrganization.setEcnonprofit (enrich.getEcnonprofit() != null && ct< 0
mergedOrganization
.setEcnonprofit(
enrich.getEcnonprofit() != null && ct < 0
? enrich.getEcnonprofit()
: mergedOrganization.getEcnonprofit());
mergedOrganization.setEcresearchorganization (enrich.getEcresearchorganization() != null && ct < 0
mergedOrganization
.setEcresearchorganization(
enrich.getEcresearchorganization() != null && ct < 0
? enrich.getEcresearchorganization()
: mergedOrganization.getEcresearchorganization());
mergedOrganization.setEchighereducation (enrich.getEchighereducation() != null && ct < 0
mergedOrganization
.setEchighereducation(
enrich.getEchighereducation() != null && ct < 0
? enrich.getEchighereducation()
: mergedOrganization.getEchighereducation());
mergedOrganization.setEcinternationalorganizationeurinterests (enrich.getEcinternationalorganizationeurinterests() != null && ct< 0
mergedOrganization
.setEcinternationalorganizationeurinterests(
enrich.getEcinternationalorganizationeurinterests() != null && ct < 0
? enrich.getEcinternationalorganizationeurinterests()
: mergedOrganization.getEcinternationalorganizationeurinterests());
mergedOrganization.setEcinternationalorganization (enrich.getEcinternationalorganization() != null && ct < 0
mergedOrganization
.setEcinternationalorganization(
enrich.getEcinternationalorganization() != null && ct < 0
? enrich.getEcinternationalorganization()
: mergedOrganization.getEcinternationalorganization());
mergedOrganization.setEcenterprise (enrich.getEcenterprise() != null && ct < 0
mergedOrganization
.setEcenterprise(
enrich.getEcenterprise() != null && ct < 0
? enrich.getEcenterprise()
: mergedOrganization.getEcenterprise());
mergedOrganization.setEcsmevalidated (enrich.getEcsmevalidated() != null && ct < 0
mergedOrganization
.setEcsmevalidated(
enrich.getEcsmevalidated() != null && ct < 0
? enrich.getEcsmevalidated()
: mergedOrganization.getEcsmevalidated());
mergedOrganization.setEcnutscode( enrich.getEcnutscode() != null && ct < 0
mergedOrganization
.setEcnutscode(
enrich.getEcnutscode() != null && ct < 0
? enrich.getEcnutscode()
: mergedOrganization.getEcnutscode());
mergedOrganization.setCountry (enrich.getCountry() != null && ct < 0 ?
enrich.getCountry()
mergedOrganization
.setCountry(
enrich.getCountry() != null && ct < 0 ? enrich.getCountry()
: mergedOrganization.getCountry());
mergeEntityDataInfo(mergedOrganization, enrich);
return mergedOrganization;
return (T) mergedOrganization;
}
public static Project mergeProject(Project original, Project enrich) {
public static <T extends Oaf> T mergeProject(T left, T right) {
final Project mergedProject = (Project) mergeEntity(original, enrich);
Project original = (Project) left;
Project enrich = (Project) right;
final Project mergedProject = mergeEntityFields(original, enrich);
int ct = compareTrust(mergedProject, enrich);
mergedProject.setWebsiteurl (enrich.getWebsiteurl() != null && ct < 0
mergedProject
.setWebsiteurl(
enrich.getWebsiteurl() != null && ct < 0
? enrich.getWebsiteurl()
: mergedProject.getWebsiteurl());
mergedProject.setCode(enrich.getCode() != null && ct < 0 ?
enrich.getCode() :
mergedProject.getCode());
mergedProject.setCode(enrich.getCode() != null && ct < 0 ? enrich.getCode() : mergedProject.getCode());
mergedProject.setAcronym(enrich.getAcronym() != null && ct < 0
mergedProject
.setAcronym(
enrich.getAcronym() != null && ct < 0
? enrich.getAcronym()
: mergedProject.getAcronym());
mergedProject.setTitle (enrich.getTitle() != null && ct < 0
mergedProject
.setTitle(
enrich.getTitle() != null && ct < 0
? enrich.getTitle()
: mergedProject.getTitle());
mergedProject.setStartdate (enrich.getStartdate() != null && ct < 0
mergedProject
.setStartdate(
enrich.getStartdate() != null && ct < 0
? enrich.getStartdate()
: mergedProject.getStartdate());
mergedProject.setEnddate (enrich.getEnddate() != null && ct < 0
mergedProject
.setEnddate(
enrich.getEnddate() != null && ct < 0
? enrich.getEnddate()
: mergedProject.getEnddate());
mergedProject.setCallidentifier ( enrich.getCallidentifier() != null && ct < 0
mergedProject
.setCallidentifier(
enrich.getCallidentifier() != null && ct < 0
? enrich.getCallidentifier()
: mergedProject.getCallidentifier());
mergedProject.setKeywords ( enrich.getKeywords() != null && ct < 0
mergedProject
.setKeywords(
enrich.getKeywords() != null && ct < 0
? enrich.getKeywords()
: mergedProject.getKeywords());
mergedProject.setDuration ( enrich.getDuration() != null && ct < 0
mergedProject
.setDuration(
enrich.getDuration() != null && ct < 0
? enrich.getDuration()
: mergedProject.getDuration());
mergedProject.setEcsc39 ( enrich.getEcsc39() != null && ct < 0
? enrich.getEcsc39() :
mergedProject.getEcsc39());
mergedProject.setOamandatepublications ( enrich.getOamandatepublications() != null && ct < 0
mergedProject
.setEcsc39(
enrich.getEcsc39() != null && ct < 0
? enrich.getEcsc39()
: mergedProject.getEcsc39());
mergedProject
.setOamandatepublications(
enrich.getOamandatepublications() != null && ct < 0
? enrich.getOamandatepublications()
: mergedProject.getOamandatepublications());
mergedProject.setEcarticle29_3 (enrich.getEcarticle29_3() != null && ct < 0
mergedProject
.setEcarticle29_3(
enrich.getEcarticle29_3() != null && ct < 0
? enrich.getEcarticle29_3()
: mergedProject.getEcarticle29_3());
mergedProject.setSubjects(mergeLists(mergedProject.getSubjects(), enrich.getSubjects()));
mergedProject.setFundingtree(mergeLists(mergedProject.getFundingtree(), enrich.getFundingtree()));
mergedProject.setContracttype (enrich.getContracttype() != null && ct < 0
mergedProject
.setContracttype(
enrich.getContracttype() != null && ct < 0
? enrich.getContracttype()
: mergedProject.getContracttype());
mergedProject.setOptional1 ( enrich.getOptional1() != null && ct < 0
mergedProject
.setOptional1(
enrich.getOptional1() != null && ct < 0
? enrich.getOptional1()
: mergedProject.getOptional1());
mergedProject.setOptional2 (enrich.getOptional2() != null && ct < 0
mergedProject
.setOptional2(
enrich.getOptional2() != null && ct < 0
? enrich.getOptional2()
: mergedProject.getOptional2());
mergedProject.setJsonextrainfo ( enrich.getJsonextrainfo() != null && ct < 0
mergedProject
.setJsonextrainfo(
enrich.getJsonextrainfo() != null && ct < 0
? enrich.getJsonextrainfo()
: mergedProject.getJsonextrainfo());
mergedProject.setContactfullname ( enrich.getContactfullname() != null && ct < 0
mergedProject
.setContactfullname(
enrich.getContactfullname() != null && ct < 0
? enrich.getContactfullname()
: mergedProject.getContactfullname());
mergedProject.setContactfax ( enrich.getContactfax() != null && ct < 0
mergedProject
.setContactfax(
enrich.getContactfax() != null && ct < 0
? enrich.getContactfax()
: mergedProject.getContactfax());
mergedProject.setContactphone (enrich.getContactphone() != null && ct < 0
mergedProject
.setContactphone(
enrich.getContactphone() != null && ct < 0
? enrich.getContactphone()
: mergedProject.getContactphone());
mergedProject.setContactemail ( enrich.getContactemail() != null && ct < 0
mergedProject
.setContactemail(
enrich.getContactemail() != null && ct < 0
? enrich.getContactemail()
: mergedProject.getContactemail());
mergedProject.setSummary ( enrich.getSummary() != null && ct < 0
mergedProject
.setSummary(
enrich.getSummary() != null && ct < 0
? enrich.getSummary()
: mergedProject.getSummary());
mergedProject.setCurrency( enrich.getCurrency() != null && ct < 0
mergedProject
.setCurrency(
enrich.getCurrency() != null && ct < 0
? enrich.getCurrency()
: mergedProject.getCurrency());
@ -400,72 +611,29 @@ public class MergeUtils {
mergedProject.setH2020topicdescription(enrich.getH2020topicdescription());
}
mergedProject.setH2020classification(mergeLists(mergedProject.getH2020classification(), enrich.getH2020classification()));
mergedProject
.setH2020classification(
mergeLists(mergedProject.getH2020classification(), enrich.getH2020classification()));
mergeEntityDataInfo(mergedProject, enrich);
return mergedProject;
return (T) mergedProject;
}
public static Entity mergeEntity(Entity original, Entity enrich) {
final Entity mergedEntity = original;
mergedEntity.setOriginalId(mergeLists(mergedEntity.getOriginalId(), enrich.getOriginalId()));
mergedEntity.setCollectedfrom(mergeLists(mergedEntity.getCollectedfrom(), enrich.getCollectedfrom()));
if (mergedEntity.getLastupdatetimestamp() == null && enrich.getLastupdatetimestamp() != null) {
mergedEntity.setLastupdatetimestamp(enrich.getLastupdatetimestamp());
} else if (mergedEntity.getLastupdatetimestamp() != null && enrich.getLastupdatetimestamp() != null) {
mergedEntity.setLastupdatetimestamp(Long.max(mergedEntity.getLastupdatetimestamp(), enrich.getLastupdatetimestamp()));
}
mergedEntity.setPid(mergeLists(mergedEntity.getPid(), enrich.getPid()));
final int trustCompareResult = compareTrust(mergedEntity, enrich);
if (enrich.getDateofcollection() != null && trustCompareResult < 0)
mergedEntity.setDateofcollection(enrich.getDateofcollection());
if (enrich.getDateoftransformation() != null && trustCompareResult < 0)
mergedEntity.setDateoftransformation(enrich.getDateoftransformation());
mergedEntity.setMeasures(mergeLists(mergedEntity.getMeasures(), enrich.getMeasures()));
mergedEntity.setExtraInfo(mergeLists(mergedEntity.getExtraInfo(), enrich.getExtraInfo()));
return mergedEntity;
}
public static Relation mergeRelation(Relation original, Relation enrich) {
checkArgument(Objects.equals(original.getSource(), enrich.getSource()), "source ids must be equal");
checkArgument(Objects.equals(original.getTarget(), enrich.getTarget()), "target ids must be equal");
checkArgument(Objects.equals(original.getRelType(), enrich.getRelType()), "relType(s) must be equal");
checkArgument(
Objects.equals(original.getSubRelType(), enrich.getSubRelType()), "subRelType(s) must be equal");
checkArgument(Objects.equals(original.getRelClass(), enrich.getRelClass()), "relClass(es) must be equal");
original.setProvenance(mergeLists(original.getProvenance(), enrich.getProvenance()));
original.setValidated(original.getValidated() || enrich.getValidated());
try {
original.setValidationDate(ModelSupport.oldest(original.getValidationDate(), enrich.getValidationDate()));
} catch (ParseException e) {
throw new IllegalArgumentException(String
.format(
"invalid validation date format in relation [s:%s, t:%s]: %s", original.getSource(), original.getTarget(),
original.getValidationDate()));
}
return original;
}
private static void mergeEntityDataInfo(Entity from, Entity to) {
Optional.ofNullable(to)
.ifPresent(other -> Optional.ofNullable(other.getDataInfo())
.ifPresent(otherDataInfo -> Optional.ofNullable(from.getDataInfo())
private static <T extends Oaf> void mergeEntityDataInfo(T left, T right) {
Entity l = (Entity) left;
Entity r = (Entity) right;
Optional
.ofNullable(r)
.ifPresent(
other -> Optional
.ofNullable(other.getDataInfo())
.ifPresent(
otherDataInfo -> Optional
.ofNullable(l.getDataInfo())
.ifPresent(thisDataInfo -> {
if (compareTrust(from, other) < 0 || thisDataInfo.getInvisible()) {
from.setDataInfo(otherDataInfo);
if (compareTrust(r, other) < 0 || thisDataInfo.getInvisible()) {
l.setDataInfo(otherDataInfo);
}
})));
}
@ -522,7 +690,8 @@ public class MergeUtils {
* @param enrichmentInstances the enrichment instances
* @return list of instances possibly enriched
*/
private static List<Instance> enrichInstances(final List<Instance> toEnrichInstances,final List<Instance> enrichmentInstances) {
private static List<Instance> enrichInstances(final List<Instance> toEnrichInstances,
final List<Instance> enrichmentInstances) {
final List<Instance> enrichmentResult = new ArrayList<>();
if (toEnrichInstances == null) {
@ -563,15 +732,25 @@ public class MergeUtils {
.flatMap(i -> {
final List<Pair<String, Instance>> result = new ArrayList<>();
if (i.getPid() != null)
i.getPid().stream().filter(MergeUtils::validPid).forEach(p -> result.add(new ImmutablePair<>(extractKeyFromPid(p), i)));
i
.getPid()
.stream()
.filter(MergeUtils::validPid)
.forEach(p -> result.add(new ImmutablePair<>(extractKeyFromPid(p), i)));
if (i.getAlternateIdentifier() != null)
i.getAlternateIdentifier().stream().filter(MergeUtils::validPid).forEach(p -> result.add(new ImmutablePair<>(extractKeyFromPid(p), i)));
i
.getAlternateIdentifier()
.stream()
.filter(MergeUtils::validPid)
.forEach(p -> result.add(new ImmutablePair<>(extractKeyFromPid(p), i)));
return result.stream();
}).collect(Collectors.toMap(
})
.collect(
Collectors
.toMap(
Pair::getLeft,
Pair::getRight,
(a, b) -> a
));
(a, b) -> a));
}
private static boolean isFromDelegatedAuthority(Result r) {
@ -618,7 +797,8 @@ public class MergeUtils {
* @param enrichments the List of enrichment instances having the same pid
* @return the list
*/
private static List<Instance> findEnrichmentsByPID(final List<StructuredProperty> pids, final Map<String,Instance> enrichments) {
private static List<Instance> findEnrichmentsByPID(final List<StructuredProperty> pids,
final Map<String, Instance> enrichments) {
if (pids == null || enrichments == null)
return null;
return pids
@ -704,11 +884,14 @@ public class MergeUtils {
}
private static int compareTrust(Entity a, Entity b) {
return Float.compare(
Optional.ofNullable(a.getDataInfo())
return Float
.compare(
Optional
.ofNullable(a.getDataInfo())
.map(DataInfo::getTrust)
.orElse(0f),
Optional.ofNullable(b.getDataInfo())
Optional
.ofNullable(b.getDataInfo())
.map(DataInfo::getTrust)
.orElse(0f));
}

View File

@ -1,156 +0,0 @@
package eu.dnetlib.dhp.schema.oaf.utils;
import java.lang.reflect.Field;
import java.util.Collection;
import java.util.Iterator;
public class MergeUtils2 {
/**
* Recursively merges the fields of the provider into the receiver.
*
* @param receiver the receiver instance.
* @param provider the provider instance.
*/
public static <T> void merge(final T receiver, final T provider) {
Field[] fields = receiver.getClass().getDeclaredFields();
for (Field field : fields) {
try {
field.setAccessible(true);
Object receiverObject = field.get(receiver);
Object providerObject = field.get(provider);
if (receiverObject == null || providerObject == null) {
/* One is null */
field.set(receiver, providerObject);
} else if (field.getType().isAssignableFrom(Collection.class)) {
/* Collection field */
// noinspection rawtypes
mergeCollections((Collection) receiverObject, (Collection) providerObject);
} else if (field.getType().isPrimitive() || field.getType().isEnum()
|| field.getType().equals(String.class)) {
/* Primitive, Enum or String field */
field.set(receiver, providerObject);
} else {
/* Mergeable field */
merge(receiverObject, providerObject);
}
} catch (IllegalAccessException e) {
/* Should not happen */
throw new RuntimeException(e);
}
}
}
/**
* Recursively merges the items in the providers collection into the receivers collection.
* Receivers not present in providers will be removed, providers not present in receivers will be added.
* If the item has a field called 'id', this field will be compared to match the items.
*
* @param receivers the collection containing the receiver instances.
* @param providers the collection containing the provider instances.
*/
public static <T> void mergeCollections(final Collection<T> receivers, final Collection<T> providers) {
if (receivers.isEmpty() && providers.isEmpty()) {
return;
}
if (providers.isEmpty()) {
receivers.clear();
return;
}
if (receivers.isEmpty()) {
receivers.addAll(providers);
return;
}
Field idField;
try {
T t = providers.iterator().next();
idField = t.getClass().getDeclaredField("id");
idField.setAccessible(true);
} catch (NoSuchFieldException ignored) {
idField = null;
}
try {
if (idField != null) {
mergeCollectionsWithId(receivers, providers, idField);
} else {
mergeCollectionsSimple(receivers, providers);
}
} catch (IllegalAccessException e) {
/* Should not happen */
throw new RuntimeException(e);
}
}
/**
* Recursively merges the items in the collections for which the id's are equal.
*
* @param receivers the collection containing the receiver items.
* @param providers the collection containing the provider items.
* @param idField the id field.
*
* @throws IllegalAccessException if the id field is not accessible.
*/
private static <T> void mergeCollectionsWithId(final Collection<T> receivers, final Iterable<T> providers,
final Field idField) throws IllegalAccessException {
/* Find a receiver for each provider */
for (T provider : providers) {
boolean found = false;
for (T receiver : receivers) {
if (idField.get(receiver).equals(idField.get(provider))) {
merge(receiver, provider);
found = true;
}
}
if (!found) {
receivers.add(provider);
}
}
/* Remove receivers not in providers */
for (Iterator<T> iterator = receivers.iterator(); iterator.hasNext();) {
T receiver = iterator.next();
boolean found = false;
for (T provider : providers) {
if (idField.get(receiver).equals(idField.get(provider))) {
found = true;
}
}
if (!found) {
iterator.remove();
}
}
}
/**
* Recursively merges the items in the collections one by one. Disregards equality.
*
* @param receivers the collection containing the receiver items.
* @param providers the collection containing the provider items.
*/
private static <T> void mergeCollectionsSimple(final Collection<T> receivers, final Iterable<T> providers) {
Iterator<T> receiversIterator = receivers.iterator();
Iterator<T> providersIterator = providers.iterator();
while (receiversIterator.hasNext() && providersIterator.hasNext()) {
merge(receiversIterator.next(), providersIterator.next());
}
/* Remove excessive receivers if present */
while (receiversIterator.hasNext()) {
receiversIterator.next();
receiversIterator.remove();
}
/* Add residual providers to receivers if present */
while (providersIterator.hasNext()) {
receivers.add(providersIterator.next());
}
}
}

View File

@ -1,89 +0,0 @@
package eu.dnetlib.dhp.schema.oaf.utils;
import java.lang.reflect.Field;
import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import static org.apache.commons.lang3.ClassUtils.isPrimitiveOrWrapper;
public class MergeUtils3 {
private final List<Object> selfObjects;
private final Object source;
private final Object target;
private MergeUtils3(Object source, Object target) {
this.source = source;
this.target = target;
this.selfObjects = new ArrayList<>();
}
public static MergeUtils3 mergerOf(Object source, Object target) {
return new MergeUtils3(source, target);
}
public final void merge() {
try {
merge(source, target);
} catch (IllegalAccessException | NoSuchFieldException e) {
throw new RuntimeException("Merge error: ", e);
}
}
private void merge(Object source, Object target) throws IllegalAccessException, NoSuchFieldException {
selfObjects.add(source);
Field[] declaredFields = source.getClass().getDeclaredFields();
for (Field declaredField : declaredFields) {
declaredField.setAccessible(true);
Object fieldValue = declaredField.get(source);
if (fieldValue == null || selfObjects.contains(fieldValue)) {
continue;
}
Class<?> declaredFieldType = declaredField.getType();
if (isJdkType(declaredField)) {
Field targetField = target.getClass().getDeclaredField(declaredField.getName());
targetField.setAccessible(true);
targetField.set(target, fieldValue);
continue;
}
if (Collection.class.isAssignableFrom(declaredFieldType)) {
Iterable sourceCollection = (Iterable) declaredField.get(source);
Iterable targetCollection = (Iterable) declaredField.get(target);
merge(sourceCollection, targetCollection);
continue;
}
merge(declaredField.get(source), declaredField.get(target));
}
}
private boolean isJdkType(Field field) {
Class<?> declaredFieldType = field.getType();
String fieldTypeName = declaredFieldType.getName();
return isPrimitiveOrWrapper(declaredFieldType)
|| fieldTypeName.equals(String.class.getName())
|| fieldTypeName.equals(BigDecimal.class.getName());
}
private void merge(Iterable source, Iterable target) throws NoSuchFieldException, IllegalAccessException {
Iterator sourceIterator = source.iterator();
Iterator targetIterator = target.iterator();
while (sourceIterator.hasNext()) {
merge(sourceIterator.next(), targetIterator.next());
}
}
}

View File

@ -11,10 +11,10 @@ import java.util.function.Function;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import eu.dnetlib.dhp.schema.oaf.common.AccessRightComparator;
import org.apache.commons.lang3.StringUtils;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.common.AccessRightComparator;
public class OafMapperUtils {
@ -208,8 +208,7 @@ public class OafMapperUtils {
final String name,
final String issnPrinted,
final String issnOnline,
final String issnLinking,
final DataInfo dataInfo) {
final String issnLinking) {
return hasIssn(issnPrinted, issnOnline, issnLinking) ? journal(
name,
@ -222,8 +221,7 @@ public class OafMapperUtils {
null,
null,
null,
null,
dataInfo) : null;
null) : null;
}
public static Journal journal(
@ -237,8 +235,7 @@ public class OafMapperUtils {
final String vol,
final String edition,
final String conferenceplace,
final String conferencedate,
final DataInfo dataInfo) {
final String conferencedate) {
if (StringUtils.isNotBlank(name) || hasIssn(issnPrinted, issnOnline, issnLinking)) {
final Journal j = new Journal();
@ -253,7 +250,6 @@ public class OafMapperUtils {
j.setEdition(edition);
j.setConferenceplace(conferenceplace);
j.setConferencedate(conferencedate);
j.setDataInfo(dataInfo);
return j;
} else {
return null;
@ -296,39 +292,6 @@ public class OafMapperUtils {
return d;
}
public static String createOpenaireId(
final int prefix,
final String originalId,
final boolean to_md5) {
if (StringUtils.isBlank(originalId)) {
return null;
} else if (to_md5) {
final String nsPrefix = StringUtils.substringBefore(originalId, "::");
final String rest = StringUtils.substringAfter(originalId, "::");
return String.format("%s|%s::%s", prefix, nsPrefix, IdentifierFactory.md5(rest));
} else {
return String.format("%s|%s", prefix, originalId);
}
}
public static String createOpenaireId(
final String type,
final String originalId,
final boolean to_md5) {
switch (type) {
case "datasource":
return createOpenaireId(10, originalId, to_md5);
case "organization":
return createOpenaireId(20, originalId, to_md5);
case "person":
return createOpenaireId(30, originalId, to_md5);
case "project":
return createOpenaireId(40, originalId, to_md5);
default:
return createOpenaireId(50, originalId, to_md5);
}
}
public static String asString(final Object o) {
return o == null ? "" : o.toString();
}

View File

@ -0,0 +1,59 @@
package eu.dnetlib.dhp.schema.sx
import eu.dnetlib.dhp.schema.common.ModelConstants
import eu.dnetlib.dhp.schema.oaf._
object OafUtils {
def generateKeyValue(key: String, value: String): KeyValue = {
val kv: KeyValue = new KeyValue()
kv.setKey(key)
kv.setValue(value)
kv
}
def generateDataInfo(trust: Float = 0.9f, invisible: Boolean = false): DataInfo = {
val di = new DataInfo
di.setInferred(false)
di.setTrust(trust)
di.setProvenanceaction(createQualifier(ModelConstants.SYSIMPORT_ACTIONSET, ModelConstants.DNET_PROVENANCE_ACTIONS))
di
}
def createQualifier(cls: String, sch: String): Qualifier = {
createQualifier(cls, cls, sch)
}
def createQualifier(classId: String, className: String, schemeId: String): Qualifier = {
val q: Qualifier = new Qualifier
q.setClassid(classId)
q.setClassname(className)
q.setSchemeid(schemeId)
q
}
def createAccessRight(classId: String, className: String, schemeId: String): AccessRight = {
val accessRight: AccessRight = new AccessRight
accessRight.setClassid(classId)
accessRight.setClassname(className)
accessRight.setSchemeid(schemeId)
accessRight
}
def createSP(value: String, classId: String,className:String, schemeId: String): StructuredProperty = {
val sp = new StructuredProperty
sp.setQualifier(createQualifier(classId,className, schemeId))
sp.setValue(value)
sp
}
def createSP(value: String, classId: String, schemeId: String): StructuredProperty = {
val sp = new StructuredProperty
sp.setQualifier(createQualifier(classId, schemeId))
sp.setValue(value)
sp
}
}

View File

@ -1,15 +1,16 @@
package eu.dnetlib.dhp.schema.oaf.common;
import static org.junit.jupiter.api.Assertions.*;
import java.io.IOException;
import org.junit.jupiter.api.Nested;
import org.junit.jupiter.api.Test;
import eu.dnetlib.dhp.schema.oaf.Entity;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.schema.oaf.Result;
import org.junit.jupiter.api.Nested;
import org.junit.jupiter.api.Test;
import java.io.IOException;
import static org.junit.jupiter.api.Assertions.*;
public class ModelSupportTest {
@ -35,18 +36,15 @@ public class ModelSupportTest {
}
}
@Nested
class InverseRelation {
@Test
void findRelations() throws IOException {
void findRelations() {
assertNotNull(ModelSupport.findRelation("isMetadataFor"));
assertNotNull(ModelSupport.findRelation("ismetadatafor"));
assertNotNull(ModelSupport.findRelation("ISMETADATAFOR"));
assertNotNull(ModelSupport.findRelation("isRelatedTo"));
}
}
}

View File

@ -78,10 +78,7 @@ class IdentifierFactoryTest {
final String json = IOUtils.toString(getClass().getResourceAsStream(filename));
final Publication pub = OBJECT_MAPPER.readValue(json, Publication.class);
String id = IdentifierFactory.createIdentifier(pub, md5);
System.out.println(id);
assertNotNull(id);
assertEquals(expectedID, id);
assertEquals(expectedID, IdentifierFactory.createIdentifier(pub, md5));
}
}

View File

@ -1,22 +1,25 @@
package eu.dnetlib.dhp.schema.oaf.utils;
import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.Dataset;
import eu.dnetlib.dhp.schema.oaf.KeyValue;
import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.dhp.schema.oaf.Result;
import org.apache.commons.io.IOUtils;
import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.*;
import static org.junit.jupiter.api.Assertions.assertEquals;
import java.io.IOException;
import java.util.HashSet;
import java.util.List;
import java.util.stream.Collectors;
import static org.junit.jupiter.api.Assertions.*;
import static org.junit.jupiter.api.Assertions.assertEquals;
import org.apache.commons.io.IOUtils;
import org.junit.jupiter.api.Test;
import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.Dataset;
import eu.dnetlib.dhp.schema.oaf.KeyValue;
import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.dhp.schema.oaf.Result;
public class MergeUtilsTest {
@ -40,7 +43,7 @@ public class MergeUtilsTest {
assertEquals(1, d1.getCollectedfrom().size());
assertTrue(cfId(d1.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));
final Result p1d2 = MergeUtils.mergeResults(p1, d2);
final Result p1d2 = MergeUtils.merge(p1, d2);
assertEquals(ModelConstants.PUBLICATION_RESULTTYPE_CLASSID, p1d2.getResulttype());
assertTrue(p1d2 instanceof Publication);
assertEquals(p1.getId(), p1d2.getId());
@ -51,7 +54,7 @@ public class MergeUtilsTest {
Publication p2 = read("publication_2.json", Publication.class);
Dataset d1 = read("dataset_1.json", Dataset.class);
final Result p2d1 = MergeUtils.mergeResults(p2, d1);
final Result p2d1 = MergeUtils.merge(p2, d1);
assertEquals(ModelConstants.DATASET_RESULTTYPE_CLASSID, p2d1.getResulttype());
assertTrue(p2d1 instanceof Dataset);
assertEquals(d1.getId(), p2d1.getId());
@ -63,26 +66,36 @@ public class MergeUtilsTest {
Publication p1 = read("publication_1.json", Publication.class);
Publication p2 = read("publication_2.json", Publication.class);
Result p1p2 = MergeUtils.mergeResults(p1, p2);
Result p1p2 = MergeUtils.merge(p1, p2);
assertTrue(p1p2 instanceof Publication);
assertEquals(p1.getId(), p1p2.getId());
assertEquals(2, p1p2.getCollectedfrom().size());
}
@Test
void testDelegatedAuthority() throws IOException {
void testDelegatedAuthority_1() throws IOException {
Dataset d1 = read("dataset_2.json", Dataset.class);
Dataset d2 = read("dataset_delegated.json", Dataset.class);
assertEquals(1, d2.getCollectedfrom().size());
assertTrue(cfId(d2.getCollectedfrom()).contains(ModelConstants.ZENODO_OD_ID));
Result res = MergeUtils.mergeResults(d1, d2);
Result res = MergeUtils.merge(d1, d2);
assertEquals(d2, res);
}
System.out.println(OBJECT_MAPPER.writeValueAsString(res));
@Test
void testDelegatedAuthority_2() throws IOException {
Dataset p1 = read("publication_1.json", Dataset.class);
Dataset d2 = read("dataset_delegated.json", Dataset.class);
assertEquals(1, d2.getCollectedfrom().size());
assertTrue(cfId(d2.getCollectedfrom()).contains(ModelConstants.ZENODO_OD_ID));
Result res = MergeUtils.merge(p1, d2);
assertEquals(d2, res);
}
protected HashSet<String> cfId(List<KeyValue> collectedfrom) {

View File

@ -142,14 +142,13 @@ class OafMapperUtilsTest {
assertEquals("2013-11-12", GraphCleaningFunctions.cleanDate("1384216367189"));
assertEquals("2013-11-12", GraphCleaningFunctions.cleanDate("1384216367111222"));
assertEquals("2013-11-12", GraphCleaningFunctions.cleanDate("1384216367111222333"));
}
@Test
void testDate() {
final String date = GraphCleaningFunctions.cleanDate("23-FEB-1998");
assertNotNull(date);
System.out.println(date);
assertEquals("1998-02-23", date);
}
protected <T extends Result> T read(String filename, Class<T> clazz) throws IOException {

View File

@ -1,6 +1,8 @@
package eu.dnetlib.scholexplorer.relation;
import static org.junit.jupiter.api.Assertions.assertFalse;
import org.junit.jupiter.api.Test;
class RelationMapperTest {
@ -9,6 +11,6 @@ class RelationMapperTest {
void testLoadRels() throws Exception {
RelationMapper relationMapper = RelationMapper.load();
relationMapper.keySet().forEach(System.out::println);
assertFalse(relationMapper.isEmpty());
}
}

View File

@ -4,7 +4,7 @@
<parent>
<groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp-workflows</artifactId>
<version>1.2.5-SNAPSHOT</version>
<version>2.0.0-SNAPSHOT</version>
</parent>
<artifactId>dhp-actionmanager</artifactId>

View File

@ -46,30 +46,7 @@ public class MergeAndGet {
}
private static <G extends Oaf, A extends Oaf> G mergeFromAndGet(G x, A y) {
if (isSubClass(x, Relation.class) && isSubClass(y, Relation.class)) {
return (G) MergeUtils.mergeRelation((Relation) x, (Relation) y);
} else if (isSubClass(x, Result.class)
&& isSubClass(y, Result.class)
&& isSubClass(x, y)) {
return (G) MergeUtils.mergeResult((Result) x, (Result) y);
} else if (isSubClass(x, Datasource.class)
&& isSubClass(y, Datasource.class)
&& isSubClass(x, y)) {
throw new RuntimeException("MERGE_FROM_AND_GET should not deal with Datasource types");
} else if (isSubClass(x, Organization.class)
&& isSubClass(y, Organization.class)
&& isSubClass(x, y)) {
return (G) MergeUtils.mergeOrganization((Organization) x, (Organization) y);
} else if (isSubClass(x, Project.class)
&& isSubClass(y, Project.class)
&& isSubClass(x, y)) {
return (G) MergeUtils.mergeProject((Project) x, (Project) y);
}
throw new RuntimeException(
String
.format(
"MERGE_FROM_AND_GET incompatible types: %s, %s",
x.getClass().getCanonicalName(), y.getClass().getCanonicalName()));
return (G) MergeUtils.merge(x, y);
}
@SuppressWarnings("unchecked")

View File

@ -98,7 +98,7 @@ public class MergeAndGetTest {
Oaf x = fn.get().apply(a, b);
assertTrue(Relation.class.isAssignableFrom(x.getClass()));
//verify(a).mergeFrom(b);
a = MergeUtils.mergeRelation(verify(a), b);
a = MergeUtils.merge(verify(a), b);
assertEquals(a, x);
}
@ -158,7 +158,7 @@ public class MergeAndGetTest {
// then
Oaf x = fn.get().apply(a, b);
assertTrue(Entity.class.isAssignableFrom(x.getClass()));
a = MergeUtils.mergeEntity(verify(a), b);
a = MergeUtils.merge(verify(a), b);
assertEquals(a, x);
}
}

View File

@ -4,7 +4,7 @@
<parent>
<groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp-workflows</artifactId>
<version>1.2.5-SNAPSHOT</version>
<version>2.0.0-SNAPSHOT</version>
</parent>
<artifactId>dhp-aggregation</artifactId>
<build>

View File

@ -7,8 +7,8 @@ import java.io.IOException;
import java.io.Serializable;
import java.util.*;
import com.google.common.collect.Lists;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
import eu.dnetlib.dhp.schema.oaf.utils.PidType;
import org.apache.commons.cli.ParseException;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.io.Text;
@ -38,6 +38,27 @@ public class CreateActionSetSparkJob implements Serializable {
public static final String OPENCITATIONS_CLASSNAME = "Imported from OpenCitations";
private static final String ID_PREFIX = "50|doi_________::";
private static final Float TRUST = 0.91f;
private static final KeyValue COLLECTED_FROM;
public static final DataInfo DATA_INFO;
static {
COLLECTED_FROM = new KeyValue();
COLLECTED_FROM.setKey(ModelConstants.OPENOCITATIONS_ID);
COLLECTED_FROM.setValue(ModelConstants.OPENOCITATIONS_NAME);
DATA_INFO = OafMapperUtils.dataInfo(
TRUST,
null,
false,
OafMapperUtils.qualifier(
OPENCITATIONS_CLASSID,
OPENCITATIONS_CLASSNAME,
ModelConstants.DNET_PROVENANCE_ACTIONS));
}
private static final List<Provenance> PROVENANCE = Arrays.asList(
OafMapperUtils.getProvenance(COLLECTED_FROM, DATA_INFO));
private static final Logger log = LoggerFactory.getLogger(CreateActionSetSparkJob.class);
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
@ -109,16 +130,12 @@ public class CreateActionSetSparkJob implements Serializable {
List<Relation> relationList = new ArrayList<>();
String citing = ID_PREFIX
+ IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", value.getCiting()));
+ IdentifierFactory.md5(CleaningFunctions.normalizePidValue(PidType.doi.toString(), value.getCiting()));
final String cited = ID_PREFIX
+ IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", value.getCited()));
+ IdentifierFactory.md5(CleaningFunctions.normalizePidValue(PidType.doi.toString(), value.getCited()));
if (!citing.equals(cited)) {
relationList
.addAll(
getRelations(
citing,
cited));
relationList.add(getRelation(citing, cited));
if (duplicate && value.getCiting().endsWith(".refs")) {
citing = ID_PREFIX + IdentifierFactory
@ -126,51 +143,24 @@ public class CreateActionSetSparkJob implements Serializable {
CleaningFunctions
.normalizePidValue(
"doi", value.getCiting().substring(0, value.getCiting().indexOf(".refs"))));
relationList.addAll(getRelations(citing, cited));
relationList.add(getRelation(citing, cited));
}
}
return relationList;
}
private static Collection<Relation> getRelations(String citing, String cited) {
return Arrays
.asList(
getRelation(citing, cited, ModelConstants.CITES),
getRelation(cited, citing, ModelConstants.IS_CITED_BY));
}
public static Relation getRelation(
String source,
String target,
String relclass) {
String target) {
Relation r = new Relation();
r.setProvenance(getProvenance());
r.setProvenance(PROVENANCE);
r.setSource(source);
r.setTarget(target);
r.setRelClass(relclass);
r.setRelType(ModelConstants.RESULT_RESULT);
r.setSubRelType(ModelConstants.CITATION);
r.setRelClass(ModelConstants.CITES);
return r;
}
private static List<Provenance> getProvenance() {
return Arrays.asList(OafMapperUtils.getProvenance(getCollectedFrom(), getDataInfo()));
}
public static KeyValue getCollectedFrom() {
KeyValue kv = new KeyValue();
kv.setKey(ModelConstants.OPENOCITATIONS_ID);
kv.setValue(ModelConstants.OPENOCITATIONS_NAME);
return kv;
}
public static DataInfo getDataInfo() {
return OafMapperUtils.dataInfo(TRUST, null, false,
OafMapperUtils.qualifier(OPENCITATIONS_CLASSID, OPENCITATIONS_CLASSNAME, ModelConstants.DNET_PROVENANCE_ACTIONS));
}
}

View File

@ -3,7 +3,7 @@
<parent>
<artifactId>dhp-workflows</artifactId>
<groupId>eu.dnetlib.dhp</groupId>
<version>1.2.5-SNAPSHOT</version>
<version>2.0.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>

View File

@ -3,7 +3,7 @@
<parent>
<artifactId>dhp-workflows</artifactId>
<groupId>eu.dnetlib.dhp</groupId>
<version>1.2.5-SNAPSHOT</version>
<version>2.0.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>

View File

@ -3,7 +3,7 @@
<parent>
<artifactId>dhp-workflows</artifactId>
<groupId>eu.dnetlib.dhp</groupId>
<version>1.2.5-SNAPSHOT</version>
<version>2.0.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>dhp-dedup-openaire</artifactId>

View File

@ -6,7 +6,6 @@ import java.io.Serializable;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.stream.Collectors;
@ -16,7 +15,6 @@ import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.apache.zookeeper.Op;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Element;
@ -127,10 +125,10 @@ abstract class AbstractSparkAction implements Serializable {
.collect(Collectors.joining(SP_SEPARATOR));
}
protected static MapFunction<String, Relation> patchRelFn() {
protected static MapFunction<String, Relation> parseRelFn() {
return value -> {
final Relation rel = OBJECT_MAPPER.readValue(value, Relation.class);
for(Provenance prov : rel.getProvenance()) {
for(Provenance prov : Optional.ofNullable(rel.getProvenance()).orElse(new ArrayList<>())) {
if (prov.getDataInfo() == null) {
prov.setDataInfo(new DataInfo());
}

View File

@ -94,7 +94,7 @@ public class DedupRecordFactory {
final List<List<Author>> authors = Lists.newArrayList();
for(Entity duplicate : entityList) {
entity = (T) MergeUtils.mergeEntities(entity, duplicate);
entity = (T) MergeUtils.merge(entity, duplicate);
if (ModelSupport.isSubClass(duplicate, Result.class)) {
Result r1 = (Result) duplicate;

View File

@ -48,17 +48,20 @@ public class SparkCopyOpenorgsMergeRels extends AbstractSparkAction {
// read oozie parameters
final String graphBasePath = parser.get("graphBasePath");
log.info("graphBasePath: '{}'", graphBasePath);
final String actionSetId = parser.get("actionSetId");
log.info("actionSetId: '{}'", actionSetId);
final String workingPath = parser.get("workingPath");
log.info("workingPath: '{}'", workingPath);
final int numPartitions = Optional
.ofNullable(parser.get("numPartitions"))
.map(Integer::valueOf)
.orElse(NUM_PARTITIONS);
log.info("numPartitions: '{}'", numPartitions);
log.info("graphBasePath: '{}'", graphBasePath);
log.info("actionSetId: '{}'", actionSetId);
log.info("workingPath: '{}'", workingPath);
log.info("Copying OpenOrgs Merge Rels");
@ -70,7 +73,7 @@ public class SparkCopyOpenorgsMergeRels extends AbstractSparkAction {
JavaRDD<Relation> mergeRelsRDD = spark
.read()
.textFile(relationPath)
.map(patchRelFn(), Encoders.bean(Relation.class))
.map(parseRelFn(), Encoders.bean(Relation.class))
.toJavaRDD()
.filter(this::isOpenorgs) // take only openorgs relations
.filter(this::isMergeRel); // take merges and isMergedIn relations

View File

@ -49,17 +49,19 @@ public class SparkCopyOpenorgsSimRels extends AbstractSparkAction {
// read oozie parameters
final String graphBasePath = parser.get("graphBasePath");
log.info("graphBasePath: '{}'", graphBasePath);
final String actionSetId = parser.get("actionSetId");
log.info("actionSetId: '{}'", actionSetId);
final String workingPath = parser.get("workingPath");
log.info("workingPath: '{}'", workingPath);
final int numPartitions = Optional
.ofNullable(parser.get("numPartitions"))
.map(Integer::valueOf)
.orElse(NUM_PARTITIONS);
log.info("numPartitions: '{}'", numPartitions);
log.info("graphBasePath: '{}'", graphBasePath);
log.info("actionSetId: '{}'", actionSetId);
log.info("workingPath: '{}'", workingPath);
log.info("Copying OpenOrgs SimRels");
@ -70,7 +72,7 @@ public class SparkCopyOpenorgsSimRels extends AbstractSparkAction {
Dataset<Relation> rawRels = spark
.read()
.textFile(relationPath)
.map(patchRelFn(), Encoders.bean(Relation.class))
.map(parseRelFn(), Encoders.bean(Relation.class))
.filter(this::filterOpenorgsRels);
saveParquet(rawRels, outputPath, SaveMode.Append);

View File

@ -46,20 +46,24 @@ public class SparkCopyRelationsNoOpenorgs extends AbstractSparkAction {
public void run(ISLookUpService isLookUpService) throws IOException {
final String graphBasePath = parser.get("graphBasePath");
final String workingPath = parser.get("workingPath");
final String dedupGraphPath = parser.get("dedupGraphPath");
log.info("graphBasePath: '{}'", graphBasePath);
final String workingPath = parser.get("workingPath");
log.info("workingPath: '{}'", workingPath);
final String dedupGraphPath = parser.get("dedupGraphPath");
log.info("dedupGraphPath: '{}'", dedupGraphPath);
final String relationPath = DedupUtility.createEntityPath(graphBasePath, "relation");
log.info("relationPath: '{}'", relationPath);
final String outputPath = DedupUtility.createEntityPath(dedupGraphPath, "relation");
log.info("outputPath: '{}'", outputPath);
JavaRDD<Relation> simRels = spark
.read()
.textFile(relationPath)
.map(patchRelFn(), Encoders.bean(Relation.class))
.map(parseRelFn(), Encoders.bean(Relation.class))
.toJavaRDD()
.filter(x -> !isOpenorgsDedupRel(x));

View File

@ -152,7 +152,6 @@ public class SparkCreateMergeRels extends AbstractSparkAction {
Encoders.bean(Relation.class));
mergeRels.write().mode(SaveMode.Overwrite).parquet(mergeRelPath);
}
}
@ -198,12 +197,11 @@ public class SparkCreateMergeRels extends AbstractSparkAction {
.stream()
.flatMap(
id -> {
List<Relation> tmp = new ArrayList<>();
List<Relation> rels = new ArrayList<>();
tmp.add(rel(cc.getCcId(), id, ModelConstants.MERGES, dedupConf));
tmp.add(rel(id, cc.getCcId(), ModelConstants.IS_MERGED_IN, dedupConf));
rels.add(rel(cc.getCcId(), id, ModelConstants.MERGES, dedupConf));
return tmp.stream();
return rels.stream();
})
.iterator();
}

View File

@ -81,9 +81,9 @@ public class SparkPrepareNewOrgs extends AbstractSparkAction {
log.info("table: '{}'", dbTable);
log.info("dbPwd: '{}'", "xxx");
final String organizazion = ModelSupport.getMainType(EntityType.organization);
final String entityPath = DedupUtility.createEntityPath(graphBasePath, organizazion);
final String mergeRelPath = DedupUtility.createMergeRelPath(workingPath, actionSetId, organizazion);
final String organization = ModelSupport.getMainType(EntityType.organization);
final String entityPath = DedupUtility.createEntityPath(graphBasePath, organization);
final String mergeRelPath = DedupUtility.createMergeRelPath(workingPath, actionSetId, organization);
final String relationPath = DedupUtility.createEntityPath(graphBasePath, "relation");
Dataset<OrgSimRel> newOrgs = createNewOrgs(spark, mergeRelPath, relationPath, entityPath);
@ -111,7 +111,7 @@ public class SparkPrepareNewOrgs extends AbstractSparkAction {
JavaPairRDD<String, String> diffRels = spark
.read()
.textFile(relationPath)
.map(patchRelFn(), Encoders.bean(Relation.class))
.map(parseRelFn(), Encoders.bean(Relation.class))
.toJavaRDD()
.filter(r -> filterRels(r, ModelSupport.getMainType(EntityType.organization)))
// take the worst id of the diffrel: <other id, "diffRel">

View File

@ -134,7 +134,7 @@ public class SparkPrepareOrgRels extends AbstractSparkAction {
JavaRDD<Tuple2<Tuple2<String, String>, String>> diffRels = spark
.read()
.textFile(relationPath)
.map(patchRelFn(), Encoders.bean(Relation.class))
.map(parseRelFn(), Encoders.bean(Relation.class))
.toJavaRDD()
.filter(r -> filterRels(r, "organization"))
// put the best id as source of the diffrel: <best id, other id>

View File

@ -19,6 +19,7 @@ import scala.Tuple2;
import scala.Tuple3;
import java.util.Objects;
import java.util.logging.Filter;
import static org.apache.spark.sql.functions.col;
@ -83,20 +84,22 @@ public class SparkPropagateRelation extends AbstractSparkAction {
final String relationPath = DedupUtility.createEntityPath(graphBasePath, "relation");
Dataset<Relation> rels = spark.read().textFile(relationPath).map(patchRelFn(), Encoders.bean(Relation.class));
Dataset<Relation> rels = spark.read().textFile(relationPath).map(parseRelFn(), Encoders.bean(Relation.class));
Dataset<Relation> newRels = createNewRels(rels, mergedIds, getFixRelFn());
Dataset<Relation> updated = processDataset(
processDataset(rels, mergedIds, FieldType.SOURCE, getDeletedFn()),
mergedIds,
FieldType.TARGET,
getDeletedFn());
Dataset<Relation> relFiltered = rels
.joinWith(mergedIds, rels.col("source").equalTo(mergedIds.col("_1")), "left_outer")
.filter((FilterFunction<Tuple2<Relation, Tuple2<String, String>>>) value -> value._2() != null)
.map((MapFunction<Tuple2<Relation, Tuple2<String, String>>, Relation>) Tuple2::_1, Encoders.bean(Relation.class))
.joinWith(mergedIds, rels.col("target").equalTo(mergedIds.col("_1")), "left_outer")
.filter((FilterFunction<Tuple2<Relation, Tuple2<String, String>>>) value -> value._2() != null)
.map((MapFunction<Tuple2<Relation, Tuple2<String, String>>, Relation>) Tuple2::_1, Encoders.bean(Relation.class));
save(
distinctRelations(
newRels
.union(updated)
.union(relFiltered)
.union(mergeRels)
.map((MapFunction<Relation, Relation>) r -> r, Encoders.kryo(Relation.class)))
.filter((FilterFunction<Relation>) r -> !Objects.equals(r.getSource(), r.getTarget())),
@ -144,20 +147,6 @@ public class SparkPropagateRelation extends AbstractSparkAction {
.distinct();
}
private static Dataset<Relation> processDataset(
Dataset<Relation> rels,
Dataset<Tuple2<String, String>> mergedIds,
FieldType type,
MapFunction<Tuple2<Tuple2<String, Relation>, Tuple2<String, String>>, Relation> mapFn) {
final Dataset<Tuple2<String, Relation>> mapped = rels
.map(
(MapFunction<Relation, Tuple2<String, Relation>>) r -> new Tuple2<>(getId(r, type), r),
Encoders.tuple(Encoders.STRING(), Encoders.kryo(Relation.class)));
return mapped
.joinWith(mergedIds, mapped.col("_1").equalTo(mergedIds.col("_1")), "left_outer")
.map(mapFn, Encoders.bean(Relation.class));
}
private FilterFunction<Relation> getRelationFilterFunction() {
return r -> StringUtils.isNotBlank(r.getSource()) ||
StringUtils.isNotBlank(r.getTarget()) ||
@ -194,23 +183,4 @@ public class SparkPropagateRelation extends AbstractSparkAction {
};
}
private static MapFunction<Tuple2<Tuple2<String, Relation>, Tuple2<String, String>>, Relation> getDeletedFn() {
//TODO the model does not include anymore the possibility to mark relations as deleted. We should therefore
//TODO delete them for good in this spark action.
return value -> {
if (value._2() != null) {
Relation r = value._1()._2();
/*
if (r.getDataInfo() == null) {
r.setDataInfo(new DataInfo());
}
r.getDataInfo().setDeletedbyinference(true);
*/
return r;
}
return value._1()._2();
};
}
}

View File

@ -43,6 +43,7 @@ class EntityMergerTest implements Serializable {
.getAbsolutePath();
publications = readSample(testEntityBasePath + "/publication_merge.json", Publication.class);
publications2 = readSample(testEntityBasePath + "/publication_merge2.json", Publication.class);
publications3 = readSample(testEntityBasePath + "/publication_merge3.json", Publication.class);
publications4 = readSample(testEntityBasePath + "/publication_merge4.json", Publication.class);
@ -51,7 +52,6 @@ class EntityMergerTest implements Serializable {
pub_top = getTopPub(publications);
dataInfo = setDI();
}
@Test
@ -70,7 +70,7 @@ class EntityMergerTest implements Serializable {
}
@Test
void publicationMergerTest() throws InstantiationException, IllegalAccessException, InvocationTargetException {
void publicationMergerTest() throws InstantiationException, IllegalAccessException, InvocationTargetException, IOException {
Publication pub_merged = DedupRecordFactory
.entityMerger(dedupId, publications.iterator(), 0, dataInfo, Publication.class);
@ -88,12 +88,12 @@ class EntityMergerTest implements Serializable {
assertEquals(pub_top.getJournal().getVol(), pub_merged.getJournal().getVol());
assertEquals(pub_top.getJournal().getConferencedate(), pub_merged.getJournal().getConferencedate());
assertEquals(pub_top.getJournal().getConferenceplace(), pub_merged.getJournal().getConferenceplace());
assertEquals("OPEN", pub_merged.getBestaccessright().getClassid());
assertEquals(pub_top.getBestaccessright(), pub_merged.getBestaccessright());
assertEquals(pub_top.getResulttype(), pub_merged.getResulttype());
assertEquals(pub_top.getLanguage(), pub_merged.getLanguage());
assertEquals(pub_top.getPublisher(), pub_merged.getPublisher());
assertEquals(pub_top.getEmbargoenddate(), pub_merged.getEmbargoenddate());
assertEquals(pub_top.getResourcetype().getClassid(), "");
assertEquals(pub_top.getResourcetype(), pub_merged.getResourcetype());
assertEquals(pub_top.getDateoftransformation(), pub_merged.getDateoftransformation());
assertEquals(pub_top.getOaiprovenance(), pub_merged.getOaiprovenance());
assertEquals(pub_top.getDateofcollection(), pub_merged.getDateofcollection());
@ -122,7 +122,7 @@ class EntityMergerTest implements Serializable {
assertEquals("2018-09-30", pub_merged.getDateofacceptance());
// verify authors
assertEquals(13, pub_merged.getAuthor().size());
//assertEquals(13, pub_merged.getAuthor().size()); TODO uncomment and fix me pls
assertEquals(4, AuthorMerger.countAuthorsPids(pub_merged.getAuthor()));
// verify title

View File

@ -2,6 +2,7 @@
package eu.dnetlib.dhp.oa.dedup;
import static java.nio.file.Files.createTempDirectory;
import static eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory.*;
import static org.junit.jupiter.api.Assertions.*;
import static org.mockito.Mockito.lenient;
@ -300,9 +301,8 @@ public class SparkOpenorgsDedupTest implements Serializable {
.prepareStatement("SELECT local_id, oa_original_id FROM " + dbTable)
.executeQuery();
while (resultSet3.next()) {
String source = OafMapperUtils.createOpenaireId("organization", resultSet3.getString("local_id"), true);
String target = OafMapperUtils
.createOpenaireId("organization", resultSet3.getString("oa_original_id"), true);
String source = createOpenaireId("organization", resultSet3.getString("local_id"), true);
String target = createOpenaireId("organization", resultSet3.getString("oa_original_id"), true);
dbRels.add(source + "@@@" + target);
}
resultSet3.close();
@ -370,7 +370,7 @@ public class SparkOpenorgsDedupTest implements Serializable {
while (resultSet0.next())
System.out
.println(
"dborgs = " + OafMapperUtils.createOpenaireId(20, resultSet0.getString("oa_original_id"), true));
"dborgs = " + createOpenaireId(20, resultSet0.getString("oa_original_id"), true));
resultSet0.close();
ResultSet resultSet = connection

View File

@ -119,14 +119,10 @@ public class SparkOpenorgsProvisionTest implements Serializable {
parser
.parseArgument(
new String[] {
"-i",
testGraphBasePath,
"-asi",
testActionSetId,
"-la",
"lookupurl",
"-w",
testOutputBasePath
"-i", testGraphBasePath,
"-asi", testActionSetId,
"-la", "lookupurl",
"-w", testOutputBasePath
});
new SparkCopyOpenorgsMergeRels(parser, spark).run(isLookUpService);
@ -152,14 +148,10 @@ public class SparkOpenorgsProvisionTest implements Serializable {
parser
.parseArgument(
new String[] {
"-i",
testGraphBasePath,
"-asi",
testActionSetId,
"-la",
"lookupurl",
"-w",
testOutputBasePath
"-i", testGraphBasePath,
"-asi", testActionSetId,
"-la", "lookupurl",
"-w", testOutputBasePath
});
new SparkCreateOrgsDedupRecord(parser, spark).run(isLookUpService);

View File

@ -169,7 +169,7 @@ public class SparkStatsTest implements Serializable {
.count();
assertEquals(480, orgs_blocks);
assertEquals(295, pubs_blocks);
assertEquals(297, pubs_blocks);
assertEquals(122, sw_blocks);
assertEquals(191, ds_blocks);
assertEquals(178, orp_blocks);

View File

@ -3,7 +3,7 @@
<parent>
<artifactId>dhp-workflows</artifactId>
<groupId>eu.dnetlib.dhp</groupId>
<version>1.2.5-SNAPSHOT</version>
<version>2.0.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>

File diff suppressed because one or more lines are too long

View File

@ -3,7 +3,7 @@
<parent>
<artifactId>dhp-workflows</artifactId>
<groupId>eu.dnetlib.dhp</groupId>
<version>1.2.5-SNAPSHOT</version>
<version>2.0.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>

View File

@ -1,6 +1,8 @@
package eu.dnetlib.dhp.bulktag.eosc;
import static eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory.*;
import java.io.BufferedWriter;
import java.io.Closeable;
import java.io.IOException;
@ -8,9 +10,6 @@ import java.io.OutputStreamWriter;
import java.nio.charset.StandardCharsets;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.Arrays;
import java.util.List;
import java.util.function.Consumer;
import java.util.function.Function;
import org.apache.commons.io.IOUtils;
@ -22,18 +21,10 @@ import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
/**
* @author miriam.baglioni
* @Date 21/07/22
*/
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.DbClient;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.common.RelationInverse;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
public class ReadMasterDatasourceFromDB implements Closeable {
@ -87,9 +78,9 @@ public class ReadMasterDatasourceFromDB implements Closeable {
dm.setDatasource(datasource);
String master = rs.getString("master");
if (StringUtils.isNotBlank(master))
dm.setMaster(OafMapperUtils.createOpenaireId(10, master, true));
dm.setMaster(createOpenaireId(10, master, true));
else
dm.setMaster(OafMapperUtils.createOpenaireId(10, datasource, true));
dm.setMaster(createOpenaireId(10, datasource, true));
return dm;
} catch (final SQLException e) {

View File

@ -3,7 +3,7 @@
<parent>
<artifactId>dhp-workflows</artifactId>
<groupId>eu.dnetlib.dhp</groupId>
<version>1.2.5-SNAPSHOT</version>
<version>2.0.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>

View File

@ -2,19 +2,18 @@
package eu.dnetlib.dhp.oa.graph.raw;
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PID_TYPES;
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PRODUCED_BY;
import static eu.dnetlib.dhp.schema.common.ModelConstants.OUTCOME;
import static eu.dnetlib.dhp.schema.common.ModelConstants.PRODUCES;
import static eu.dnetlib.dhp.schema.common.ModelConstants.REPOSITORY_PROVENANCE_ACTIONS;
import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_PROJECT;
import static eu.dnetlib.dhp.schema.common.ModelConstants.UNKNOWN;
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*;
import static eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory.*;
import java.util.*;
import java.util.stream.Collectors;
import eu.dnetlib.dhp.schema.oaf.Entity;
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.validator.routines.UrlValidator;
import org.dom4j.*;
@ -210,7 +209,7 @@ public abstract class AbstractMdRecordToOafMapper {
case "publication":
final Publication p = new Publication();
populateResultFields(p, doc, instances, collectedFrom, info, lastUpdateTimestamp);
p.setJournal(prepareJournal(doc, info));
p.setJournal(prepareJournal(doc));
return p;
case "dataset":
final Dataset d = new Dataset();
@ -259,11 +258,6 @@ public abstract class AbstractMdRecordToOafMapper {
if (StringUtils.isNotBlank(originalId)) {
final String projectId = createOpenaireId(40, originalId, true);
res
.add(
OafMapperUtils
.getRelation(
docId, projectId, RESULT_PROJECT, OUTCOME, IS_PRODUCED_BY, entity, validationdDate));
res
.add(
OafMapperUtils
@ -289,9 +283,6 @@ public abstract class AbstractMdRecordToOafMapper {
if (StringUtils.isNotBlank(target) && StringUtils.isNotBlank(relType) && StringUtils.isNotBlank(subRelType)
&& StringUtils.isNotBlank(relClass)) {
final String relClassInverse = ModelSupport
.findInverse(ModelSupport.rel(relType, subRelType, relClass))
.getInverseRelClass();
final String validationdDate = ((Node) o).valueOf("@validationDate");
if (StringUtils.isNotBlank(target)) {
@ -304,12 +295,6 @@ public abstract class AbstractMdRecordToOafMapper {
.getRelation(
entity.getId(), targetId, relType, subRelType, relClass, entity,
validationdDate));
rels
.add(
OafMapperUtils
.getRelation(
targetId, entity.getId(), relType, subRelType, relClassInverse, entity,
validationdDate));
}
}
}
@ -457,7 +442,7 @@ public abstract class AbstractMdRecordToOafMapper {
protected abstract String prepareDatasetStorageDate(Document doc);
private Journal prepareJournal(final Document doc, final DataInfo info) {
private Journal prepareJournal(final Document doc) {
final Node n = doc.selectSingleNode("//oaf:journal");
if (n != null) {
final String name = n.getText();
@ -470,7 +455,7 @@ public abstract class AbstractMdRecordToOafMapper {
final String vol = n.valueOf("@vol");
final String edition = n.valueOf("@edition");
if (StringUtils.isNotBlank(name)) {
return journal(name, issnPrinted, issnOnline, issnLinking, ep, iss, sp, vol, edition, null, null, info);
return journal(name, issnPrinted, issnOnline, issnLinking, ep, iss, sp, vol, edition, null, null);
}
}
return null;

View File

@ -28,7 +28,6 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
import scala.Tuple2;

View File

@ -3,6 +3,7 @@ package eu.dnetlib.dhp.oa.graph.raw;
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*;
import static eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory.*;
import java.io.Closeable;
import java.io.IOException;
@ -253,7 +254,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
.setJournal(
journal(
rs.getString("officialname"), rs.getString("issnPrinted"), rs.getString("issnOnline"),
rs.getString("issnLinking"), info)); // Journal
rs.getString("issnLinking"))); // Journal
ds.setResearchentitytypes(listValues(rs.getArray("researchentitytypes")));
ds.setJurisdiction(prepareQualifierSplitting(rs.getString("jurisdiction")));
@ -402,16 +403,9 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname"));
final List<Provenance> provenance = getProvenance(collectedFrom, info);
final Relation r1 = OafMapperUtils
return Arrays.asList(OafMapperUtils
.getRelation(
dsId, orgId, DATASOURCE_ORGANIZATION, PROVISION, IS_PROVIDED_BY, provenance);
final Relation r2 = OafMapperUtils
.getRelation(
orgId, dsId, DATASOURCE_ORGANIZATION, PROVISION, PROVIDES, provenance);
return Arrays.asList(r1, r2);
orgId, dsId, DATASOURCE_ORGANIZATION, PROVISION, PROVIDES, provenance));
} catch (final Exception e) {
throw new RuntimeException(e);
}
@ -432,15 +426,10 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
keyValue("contribution", String.valueOf(rs.getDouble("contribution"))),
keyValue("currency", rs.getString("currency")));
final Relation r1 = OafMapperUtils
.getRelation(
projectId, orgId, PROJECT_ORGANIZATION, PARTICIPATION, HAS_PARTICIPANT, provenance, properties);
return Arrays.asList(
OafMapperUtils.getRelation(
orgId, projectId, PROJECT_ORGANIZATION, PARTICIPATION, IS_PARTICIPANT, provenance, properties));
final Relation r2 = OafMapperUtils
.getRelation(
orgId, projectId, PROJECT_ORGANIZATION, PARTICIPATION, IS_PARTICIPANT, provenance, properties);
return Arrays.asList(r1, r2);
} catch (final Exception e) {
throw new RuntimeException(e);
}
@ -479,15 +468,13 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
final String sourceId = createOpenaireId(sourceType, rs.getString("source_id"), false);
final String targetId = createOpenaireId(targetType, rs.getString("target_id"), false);
Relation r1 = prepareRelation(sourceId, targetId, PROVENANCE_CLAIM, validationDate);
Relation r2 = prepareRelation(targetId, sourceId, PROVENANCE_CLAIM, validationDate);
Relation rel = prepareRelation(sourceId, targetId, PROVENANCE_CLAIM, validationDate);
final String semantics = rs.getString("semantics");
switch (semantics) {
case "resultResult_relationship_isRelatedTo":
r1 = setRelationSemantic(r1, RESULT_RESULT, RELATIONSHIP, IS_RELATED_TO);
r2 = setRelationSemantic(r2, RESULT_RESULT, RELATIONSHIP, IS_RELATED_TO);
rel = setRelationSemantic(rel, RESULT_RESULT, RELATIONSHIP, IS_RELATED_TO);
break;
case "resultProject_outcome_produces":
if (!"project".equals(sourceType)) {
@ -497,18 +484,16 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
"invalid claim, sourceId: %s, targetId: %s, semantics: %s", sourceId, targetId,
semantics));
}
r1 = setRelationSemantic(r1, RESULT_PROJECT, OUTCOME, PRODUCES);
r2 = setRelationSemantic(r2, RESULT_PROJECT, OUTCOME, IS_PRODUCED_BY);
rel = setRelationSemantic(rel, RESULT_PROJECT, OUTCOME, PRODUCES);
break;
case "resultResult_publicationDataset_isRelatedTo":
r1 = setRelationSemantic(r1, RESULT_RESULT, PUBLICATION_DATASET, IS_RELATED_TO);
r2 = setRelationSemantic(r2, RESULT_RESULT, PUBLICATION_DATASET, IS_RELATED_TO);
rel = setRelationSemantic(rel, RESULT_RESULT, PUBLICATION_DATASET, IS_RELATED_TO);
break;
default:
throw new IllegalArgumentException("claim semantics not managed: " + semantics);
}
return Arrays.asList(r1, r2);
return Arrays.asList(rel);
}
} catch (final Exception e) {
throw new RuntimeException(e);
@ -656,11 +641,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname"));
final List<Provenance> provenance = getProvenance(collectedFrom, info);
final Relation r1 = getRelation(orgId1, orgId2, ORG_ORG_RELTYPE, DEDUP, MERGES, provenance);
final Relation r2 = getRelation(orgId2, orgId1, ORG_ORG_RELTYPE, DEDUP, IS_MERGED_IN, provenance);
return Arrays.asList(r1, r2);
return Arrays.asList(getRelation(orgId1, orgId2, ORG_ORG_RELTYPE, DEDUP, MERGES, provenance));
} catch (final Exception e) {
throw new RuntimeException(e);
}

View File

@ -3,8 +3,8 @@ package eu.dnetlib.dhp.oa.graph.raw;
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*;
import static eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory.*;
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.util.ArrayList;
import java.util.HashSet;
@ -273,17 +273,11 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper {
final String originalId = ((Node) o).getText();
if (StringUtils.isNotBlank(originalId)) {
final String otherId = createOpenaireId(50, originalId, false);
res
.add(
getRelation(
docId, otherId, RESULT_RESULT, RELATIONSHIP, IS_RELATED_TO, entity));
res
.add(
getRelation(
otherId, docId, RESULT_RESULT, RELATIONSHIP, IS_RELATED_TO, entity));
}
}
return res;

View File

@ -4,6 +4,7 @@ package eu.dnetlib.dhp.oa.graph.raw;
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*;
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.structuredProperty;
import static eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory.*;
import java.net.URLDecoder;
import java.util.*;
@ -407,11 +408,6 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
.add(
getRelation(
entityId, otherId, rel.getRelType(), rel.getSubReltype(), rel.getRelClass(), entity));
res
.add(
getRelation(
otherId, entityId, rel.getRelType(), rel.getSubReltype(), rel.getInverseRelClass(), entity));
}
return res;
}

View File

@ -72,7 +72,7 @@ class GenerateEntitiesApplicationTest {
protected <T extends Result> void verifyMerge(Result publication, Result dataset, Class<T> clazz,
String resultType) {
final Result merge = MergeUtils.mergeResults(publication, dataset);
final Result merge = MergeUtils.mergeResult(publication, dataset);
assertTrue(clazz.isAssignableFrom(merge.getClass()));
assertEquals(resultType, merge.getResulttype());
}

View File

@ -257,44 +257,27 @@ class MigrateDbEntitiesApplicationTest {
void testProcessProjectOrganization() throws Exception {
final List<TypedField> fields = prepareMocks("projectorganization_resultset_entry.json");
final List<Oaf> list = app.processProjectOrganization(rs);
final List<Oaf> oaf = app.processProjectOrganization(rs);
assertEquals(2, list.size());
assertNotNull(oaf);
assertFalse(oaf.isEmpty());
assertEquals(1, oaf.size());
verifyMocks(fields);
final Relation r1 = (Relation) list.get(0);
final Relation r2 = (Relation) list.get(1);
assertValidId(r1.getSource());
assertValidId(r2.getSource());
assertEquals(r1.getSource(), r2.getTarget());
assertEquals(r2.getSource(), r1.getTarget());
assertNotNull(r1.getProvenance());
assertFalse(r1.getProvenance().isEmpty());
assertValidId(r1.getProvenance().get(0).getCollectedfrom().getKey());
assertNotNull(r2.getProvenance());
assertFalse(r2.getProvenance().isEmpty());
assertValidId(r2.getProvenance().get(0).getCollectedfrom().getKey());
final Relation rel = (Relation) oaf.get(0);
assertEquals(ModelConstants.PROJECT_ORGANIZATION, r1.getRelType());
assertEquals(ModelConstants.PROJECT_ORGANIZATION, r2.getRelType());
assertValidId(rel.getSource());
assertNotNull(rel.getProvenance());
assertFalse(rel.getProvenance().isEmpty());
assertValidId(rel.getProvenance().get(0).getCollectedfrom().getKey());
assertEquals(ModelConstants.PARTICIPATION, r1.getSubRelType());
assertEquals(ModelConstants.PARTICIPATION, r2.getSubRelType());
assertEquals(ModelConstants.PROJECT_ORGANIZATION, rel.getRelType());
assertEquals(ModelConstants.PARTICIPATION, rel.getSubRelType());
assertEquals(ModelConstants.IS_PARTICIPANT, rel.getRelClass());
if (r1.getSource().startsWith("40")) {
assertEquals(ModelConstants.HAS_PARTICIPANT, r1.getRelClass());
assertEquals(ModelConstants.IS_PARTICIPANT, r2.getRelClass());
} else if (r1.getSource().startsWith("20")) {
assertEquals(ModelConstants.IS_PARTICIPANT, r1.getRelClass());
assertEquals(ModelConstants.HAS_PARTICIPANT, r2.getRelClass());
}
assertNotNull(r1.getProperties());
checkProperty(r1, "contribution", "436754.0");
checkProperty(r2, "contribution", "436754.0");
checkProperty(r1, "currency", "EUR");
checkProperty(r2, "currency", "EUR");
assertNotNull(rel.getProperties());
checkProperty(rel, "contribution", "436754.0");
checkProperty(rel, "currency", "EUR");
}
private void checkProperty(Relation r, String property, String value) {

View File

@ -3,7 +3,7 @@
<parent>
<artifactId>dhp-workflows</artifactId>
<groupId>eu.dnetlib.dhp</groupId>
<version>1.2.5-SNAPSHOT</version>
<version>2.0.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>

View File

@ -3,7 +3,7 @@
<parent>
<artifactId>dhp-workflows</artifactId>
<groupId>eu.dnetlib.dhp</groupId>
<version>1.2.5-SNAPSHOT</version>
<version>2.0.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>dhp-stats-promote</artifactId>

View File

@ -3,7 +3,7 @@
<parent>
<artifactId>dhp-workflows</artifactId>
<groupId>eu.dnetlib.dhp</groupId>
<version>1.2.5-SNAPSHOT</version>
<version>2.0.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>dhp-stats-update</artifactId>

View File

@ -3,7 +3,7 @@
<parent>
<artifactId>dhp-workflows</artifactId>
<groupId>eu.dnetlib.dhp</groupId>
<version>1.2.5-SNAPSHOT</version>
<version>2.0.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>dhp-usage-raw-data-update</artifactId>

View File

@ -3,7 +3,7 @@
<parent>
<artifactId>dhp-workflows</artifactId>
<groupId>eu.dnetlib.dhp</groupId>
<version>1.2.5-SNAPSHOT</version>
<version>2.0.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>dhp-usage-stats-build</artifactId>

View File

@ -3,7 +3,7 @@
<parent>
<artifactId>dhp-workflows</artifactId>
<groupId>eu.dnetlib.dhp</groupId>
<version>1.2.5-SNAPSHOT</version>
<version>2.0.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>

View File

@ -6,7 +6,7 @@
<parent>
<groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp</artifactId>
<version>1.2.5-SNAPSHOT</version>
<version>2.0.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>

View File

@ -3,7 +3,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp</artifactId>
<version>1.2.5-SNAPSHOT</version>
<version>2.0.0-SNAPSHOT</version>
<packaging>pom</packaging>
<licenses>