WIP: using common definitions from ModelConstants

This commit is contained in:
Claudio Atzori 2021-03-31 17:07:13 +02:00
parent 27681b876c
commit 72ce741ea6
18 changed files with 219 additions and 197 deletions

View File

@ -174,7 +174,9 @@ public class CleaningFunctions {
if (Objects.isNull(i.getAccessright()) || StringUtils.isBlank(i.getAccessright().getClassid())) {
i
.setAccessright(
accessRight(ModelConstants.UNKNOWN, "not available", ModelConstants.DNET_ACCESS_MODES));
accessRight(
ModelConstants.UNKNOWN, ModelConstants.NOT_AVAILABLE,
ModelConstants.DNET_ACCESS_MODES));
}
if (Objects.isNull(i.getHostedby()) || StringUtils.isBlank(i.getHostedby().getKey())) {
i.setHostedby(ModelConstants.UNKNOWN_REPOSITORY);
@ -189,7 +191,9 @@ public class CleaningFunctions {
if (Objects.isNull(bestaccessrights)) {
r
.setBestaccessright(
qualifier("UNKNOWN", "not available", ModelConstants.DNET_ACCESS_MODES));
qualifier(
ModelConstants.UNKNOWN, ModelConstants.NOT_AVAILABLE,
ModelConstants.DNET_ACCESS_MODES));
} else {
r.setBestaccessright(bestaccessrights);
}

View File

@ -18,6 +18,8 @@ public class ModelConstants {
public static final String PUBMED_CENTRAL_ID = "10|opendoar____::eda80a3d5b344bc40f3bc04f65b7a357";
public static final String ARXIV_ID = "10|opendoar____::6f4922f45568161a8cdf4ad2299f6d23";
public static final String OPENORGS_NAME = "OpenOrgs Database";
// VOCABULARY VALUE
public static final String ACCESS_RIGHT_OPEN = "OPEN";
@ -55,12 +57,12 @@ public class ModelConstants {
public static final String IS_SUPPLEMENTED_BY = "isSupplementedBy";
public static final String PART = "part";
public static final String IS_PART_OF = "isPartOf";
public static final String HAS_PARTS = "hasParts";
public static final String HAS_PART = "hasPart";
public static final String RELATIONSHIP = "relationship";
public static final String CITATION = "citation";
public static final String CITES = "cites";
public static final String IS_CITED_BY = "isCitedBy";
public static final String REVIEW = "review";
public static final String REVIEW = "review"; // subreltype
public static final String REVIEWS = "reviews";
public static final String IS_REVIEWED_BY = "isReviewedBy";
@ -84,7 +86,16 @@ public class ModelConstants {
public static final String IS_AUTHOR_INSTITUTION_OF = "isAuthorInstitutionOf";
public static final String HAS_AUTHOR_INSTITUTION = "hasAuthorInstitution";
public static final String ORG_ORG_RELTYPE = "organizationOrganization";
public static final String DEDUP = "dedup";
public static final String MERGES = "merges";
public static final String IS_MERGED_IN = "isMergedIn";
public static final String SIMILARITY = "similarity";
public static final String IS_SIMILAR_TO = "isSimilarTo";
public static final String IS_DIFFERENT_FROM = "isDifferentFrom";
public static final String UNKNOWN = "UNKNOWN";
public static final String NOT_AVAILABLE = "not available";

View File

@ -116,45 +116,45 @@ public class ModelSupport {
relationInverseMap
.put(
"projectOrganization_participation_isParticipant", new RelationInverse()
.setRelation("isParticipant")
.setInverse("hasParticipant")
.setRelType("projectOrganization")
.setSubReltype("participation"));
.setRelation(ModelConstants.IS_PARTICIPANT)
.setInverse(ModelConstants.HAS_PARTICIPANT)
.setRelType(ModelConstants.PROJECT_ORGANIZATION)
.setSubReltype(ModelConstants.PARTICIPATION));
relationInverseMap
.put(
"projectOrganization_participation_hasParticipant", new RelationInverse()
.setInverse("isParticipant")
.setRelation("hasParticipant")
.setRelType("projectOrganization")
.setSubReltype("participation"));
.setInverse(ModelConstants.IS_PARTICIPANT)
.setRelation(ModelConstants.HAS_PARTICIPANT)
.setRelType(ModelConstants.PROJECT_ORGANIZATION)
.setSubReltype(ModelConstants.PARTICIPATION));
relationInverseMap
.put(
"resultOrganization_affiliation_hasAuthorInstitution", new RelationInverse()
.setRelation("hasAuthorInstitution")
.setInverse("isAuthorInstitutionOf")
.setRelType("resultOrganization")
.setSubReltype("affiliation"));
.setRelation(ModelConstants.HAS_AUTHOR_INSTITUTION)
.setInverse(ModelConstants.IS_AUTHOR_INSTITUTION_OF)
.setRelType(ModelConstants.RESULT_ORGANIZATION)
.setSubReltype(ModelConstants.AFFILIATION));
relationInverseMap
.put(
"resultOrganization_affiliation_isAuthorInstitutionOf", new RelationInverse()
.setInverse("hasAuthorInstitution")
.setRelation("isAuthorInstitutionOf")
.setRelType("resultOrganization")
.setSubReltype("affiliation"));
.setInverse(ModelConstants.HAS_AUTHOR_INSTITUTION)
.setRelation(ModelConstants.IS_AUTHOR_INSTITUTION_OF)
.setRelType(ModelConstants.RESULT_ORGANIZATION)
.setSubReltype(ModelConstants.AFFILIATION));
relationInverseMap
.put(
"organizationOrganization_dedup_merges", new RelationInverse()
.setRelation("merges")
.setInverse("isMergedIn")
.setRelType("organizationOrganization")
.setSubReltype("dedup"));
.setRelation(ModelConstants.MERGES)
.setInverse(ModelConstants.IS_MERGED_IN)
.setRelType(ModelConstants.ORG_ORG_RELTYPE)
.setSubReltype(ModelConstants.DEDUP));
relationInverseMap
.put(
"organizationOrganization_dedup_isMergedIn", new RelationInverse()
.setInverse("merges")
.setRelation("isMergedIn")
.setRelType("organizationOrganization")
.setSubReltype("dedup"));
.setInverse(ModelConstants.MERGES)
.setRelation(ModelConstants.IS_MERGED_IN)
.setRelType(ModelConstants.ORG_ORG_RELTYPE)
.setSubReltype(ModelConstants.DEDUP));
relationInverseMap
.put(
"organizationOrganization_dedupSimilarity_isSimilarTo", new RelationInverse()
@ -166,17 +166,17 @@ public class ModelSupport {
relationInverseMap
.put(
"resultProject_outcome_isProducedBy", new RelationInverse()
.setRelation("isProducedBy")
.setInverse("produces")
.setRelType("resultProject")
.setSubReltype("outcome"));
.setRelation(ModelConstants.IS_PRODUCED_BY)
.setInverse(ModelConstants.PRODUCES)
.setRelType(ModelConstants.RESULT_PROJECT)
.setSubReltype(ModelConstants.OUTCOME));
relationInverseMap
.put(
"resultProject_outcome_produces", new RelationInverse()
.setInverse("isProducedBy")
.setRelation("produces")
.setRelType("resultProject")
.setSubReltype("outcome"));
.setInverse(ModelConstants.IS_PRODUCED_BY)
.setRelation(ModelConstants.PRODUCES)
.setRelType(ModelConstants.RESULT_PROJECT)
.setSubReltype(ModelConstants.OUTCOME));
relationInverseMap
.put(
"projectPerson_contactPerson_isContact", new RelationInverse()
@ -201,17 +201,17 @@ public class ModelSupport {
relationInverseMap
.put(
"personPerson_dedup_merges", new RelationInverse()
.setInverse("isMergedIn")
.setRelation("merges")
.setInverse(ModelConstants.IS_MERGED_IN)
.setRelation(ModelConstants.MERGES)
.setRelType("personPerson")
.setSubReltype("dedup"));
.setSubReltype(ModelConstants.DEDUP));
relationInverseMap
.put(
"personPerson_dedup_isMergedIn", new RelationInverse()
.setInverse("merges")
.setRelation("isMergedIn")
.setInverse(ModelConstants.MERGES)
.setRelation(ModelConstants.IS_MERGED_IN)
.setRelType("personPerson")
.setSubReltype("dedup"));
.setSubReltype(ModelConstants.DEDUP));
relationInverseMap
.put(
"personPerson_dedupSimilarity_isSimilarTo", new RelationInverse()
@ -222,86 +222,86 @@ public class ModelSupport {
relationInverseMap
.put(
"datasourceOrganization_provision_isProvidedBy", new RelationInverse()
.setInverse("provides")
.setRelation("isProvidedBy")
.setRelType("datasourceOrganization")
.setSubReltype("provision"));
.setInverse(ModelConstants.PROVIDES)
.setRelation(ModelConstants.IS_PROVIDED_BY)
.setRelType(ModelConstants.DATASOURCE_ORGANIZATION)
.setSubReltype(ModelConstants.PROVISION));
relationInverseMap
.put(
"datasourceOrganization_provision_provides", new RelationInverse()
.setInverse("isProvidedBy")
.setRelation("provides")
.setRelType("datasourceOrganization")
.setSubReltype("provision"));
.setInverse(ModelConstants.IS_PROVIDED_BY)
.setRelation(ModelConstants.PROVIDES)
.setRelType(ModelConstants.DATASOURCE_ORGANIZATION)
.setSubReltype(ModelConstants.PROVISION));
relationInverseMap
.put(
"resultResult_similarity_hasAmongTopNSimilarDocuments", new RelationInverse()
.setInverse("isAmongTopNSimilarDocuments")
.setRelation("hasAmongTopNSimilarDocuments")
.setRelType("resultResult")
.setRelType(ModelConstants.RESULT_RESULT)
.setSubReltype("similarity"));
relationInverseMap
.put(
"resultResult_similarity_isAmongTopNSimilarDocuments", new RelationInverse()
.setInverse("hasAmongTopNSimilarDocuments")
.setRelation("isAmongTopNSimilarDocuments")
.setRelType("resultResult")
.setRelType(ModelConstants.RESULT_RESULT)
.setSubReltype("similarity"));
relationInverseMap
.put(
"resultResult_relationship_isRelatedTo", new RelationInverse()
.setInverse("isRelatedTo")
.setRelation("isRelatedTo")
.setRelType("resultResult")
.setSubReltype("relationship"));
.setInverse(ModelConstants.IS_RELATED_TO)
.setRelation(ModelConstants.IS_RELATED_TO)
.setRelType(ModelConstants.RESULT_RESULT)
.setSubReltype(ModelConstants.RELATIONSHIP));
relationInverseMap
.put(
"resultResult_supplement_isSupplementTo", new RelationInverse()
.setInverse("isSupplementedBy")
.setRelation("isSupplementTo")
.setRelType("resultResult")
.setSubReltype("supplement"));
.setInverse(ModelConstants.IS_SUPPLEMENTED_BY)
.setRelation(ModelConstants.IS_SUPPLEMENT_TO)
.setRelType(ModelConstants.RESULT_RESULT)
.setSubReltype(ModelConstants.SUPPLEMENT));
relationInverseMap
.put(
"resultResult_supplement_isSupplementedBy", new RelationInverse()
.setInverse("isSupplementTo")
.setRelation("isSupplementedBy")
.setRelType("resultResult")
.setSubReltype("supplement"));
.setInverse(ModelConstants.IS_SUPPLEMENT_TO)
.setRelation(ModelConstants.IS_SUPPLEMENTED_BY)
.setRelType(ModelConstants.RESULT_RESULT)
.setSubReltype(ModelConstants.SUPPLEMENT));
relationInverseMap
.put(
"resultResult_part_isPartOf", new RelationInverse()
.setInverse("hasPart")
.setRelation("isPartOf")
.setRelType("resultResult")
.setSubReltype("part"));
.setInverse(ModelConstants.HAS_PART)
.setRelation(ModelConstants.IS_PART_OF)
.setRelType(ModelConstants.RESULT_RESULT)
.setSubReltype(ModelConstants.PART));
relationInverseMap
.put(
"resultResult_part_hasPart", new RelationInverse()
.setInverse("isPartOf")
.setRelation("hasPart")
.setRelType("resultResult")
.setSubReltype("part"));
.setInverse(ModelConstants.IS_PART_OF)
.setRelation(ModelConstants.HAS_PART)
.setRelType(ModelConstants.RESULT_RESULT)
.setSubReltype(ModelConstants.PART));
relationInverseMap
.put(
"resultResult_dedup_merges", new RelationInverse()
.setInverse("isMergedIn")
.setRelation("merges")
.setRelType("resultResult")
.setSubReltype("dedup"));
.setInverse(ModelConstants.IS_MERGED_IN)
.setRelation(ModelConstants.MERGES)
.setRelType(ModelConstants.RESULT_RESULT)
.setSubReltype(ModelConstants.DEDUP));
relationInverseMap
.put(
"resultResult_dedup_isMergedIn", new RelationInverse()
.setInverse("merges")
.setRelation("isMergedIn")
.setRelType("resultResult")
.setSubReltype("dedup"));
.setInverse(ModelConstants.MERGES)
.setRelation(ModelConstants.IS_MERGED_IN)
.setRelType(ModelConstants.RESULT_RESULT)
.setSubReltype(ModelConstants.DEDUP));
relationInverseMap
.put(
"resultResult_dedupSimilarity_isSimilarTo", new RelationInverse()
.setInverse("isSimilarTo")
.setRelation("isSimilarTo")
.setRelType("resultResult")
.setRelType(ModelConstants.RESULT_RESULT)
.setSubReltype("dedupSimilarity"));
}

View File

@ -10,6 +10,7 @@ import org.junit.jupiter.api.Test;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.Relation;
/** @author claudio.atzori */
@ -21,9 +22,9 @@ public class AtomicActionTest {
Relation rel = new Relation();
rel.setSource("1");
rel.setTarget("2");
rel.setRelType("resultResult");
rel.setSubRelType("dedup");
rel.setRelClass("merges");
rel.setRelType(ModelConstants.RESULT_RESULT);
rel.setSubRelType(ModelConstants.DEDUP);
rel.setRelClass(ModelConstants.MERGES);
AtomicAction aa1 = new AtomicAction(Relation.class, rel);

View File

@ -3,6 +3,7 @@ package eu.dnetlib.dhp.actionmanager.datacite
import com.fasterxml.jackson.databind.ObjectMapper
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup
import eu.dnetlib.dhp.schema.action.AtomicAction
import eu.dnetlib.dhp.schema.common.ModelConstants
import eu.dnetlib.dhp.schema.oaf.{Author, DataInfo, Instance, KeyValue, Oaf, OafMapperUtils, OtherResearchProduct, Publication, Qualifier, Relation, Result, Software, StructuredProperty, Dataset => OafDataset}
import eu.dnetlib.dhp.utils.DHPUtils
import org.apache.commons.lang3.StringUtils
@ -45,11 +46,6 @@ object DataciteToOAFTransformation {
codec.onMalformedInput(CodingErrorAction.REPLACE)
codec.onUnmappableCharacter(CodingErrorAction.REPLACE)
private val PID_VOCABULARY = "dnet:pid_types"
val COBJ_VOCABULARY = "dnet:publication_resource"
val RESULT_VOCABULARY = "dnet:result_typologies"
val ACCESS_MODE_VOCABULARY = "dnet:access_modes"
val DOI_CLASS = "doi"
@ -174,20 +170,20 @@ object DataciteToOAFTransformation {
def getTypeQualifier(resourceType: String, resourceTypeGeneral: String, schemaOrg: String, vocabularies:VocabularyGroup): (Qualifier, Qualifier) = {
if (resourceType != null && resourceType.nonEmpty) {
val typeQualifier = vocabularies.getSynonymAsQualifier(COBJ_VOCABULARY, resourceType)
val typeQualifier = vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, resourceType)
if (typeQualifier != null)
return (typeQualifier, vocabularies.getSynonymAsQualifier(RESULT_VOCABULARY, typeQualifier.getClassid))
return (typeQualifier, vocabularies.getSynonymAsQualifier(ModelConstants.DNET_RESULT_TYPOLOGIES, typeQualifier.getClassid))
}
if (schemaOrg != null && schemaOrg.nonEmpty) {
val typeQualifier = vocabularies.getSynonymAsQualifier(COBJ_VOCABULARY, schemaOrg)
val typeQualifier = vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, schemaOrg)
if (typeQualifier != null)
return (typeQualifier, vocabularies.getSynonymAsQualifier(RESULT_VOCABULARY, typeQualifier.getClassid))
return (typeQualifier, vocabularies.getSynonymAsQualifier(ModelConstants.DNET_RESULT_TYPOLOGIES, typeQualifier.getClassid))
}
if (resourceTypeGeneral != null && resourceTypeGeneral.nonEmpty) {
val typeQualifier = vocabularies.getSynonymAsQualifier(COBJ_VOCABULARY, resourceTypeGeneral)
val typeQualifier = vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, resourceTypeGeneral)
if (typeQualifier != null)
return (typeQualifier, vocabularies.getSynonymAsQualifier(RESULT_VOCABULARY, typeQualifier.getClassid))
return (typeQualifier, vocabularies.getSynonymAsQualifier(ModelConstants.DNET_RESULT_TYPOLOGIES, typeQualifier.getClassid))
}
null
@ -295,7 +291,7 @@ object DataciteToOAFTransformation {
return List()
val doi_q = vocabularies.getSynonymAsQualifier(PID_VOCABULARY, "doi")
val doi_q = vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PID_TYPES, "doi")
val pid = OafMapperUtils.structuredProperty(doi, doi_q, dataInfo)
result.setPid(List(pid).asJava)
result.setId(OafMapperUtils.createOpenaireId(50, s"datacite____::$doi", true))
@ -319,7 +315,7 @@ object DataciteToOAFTransformation {
a.setSurname(c.familyName.orNull)
if (c.nameIdentifiers!= null&& c.nameIdentifiers.isDefined && c.nameIdentifiers.get != null) {
a.setPid(c.nameIdentifiers.get.map(ni => {
val q = if (ni.nameIdentifierScheme.isDefined) vocabularies.getTermAsQualifier(PID_VOCABULARY, ni.nameIdentifierScheme.get.toLowerCase()) else null
val q = if (ni.nameIdentifierScheme.isDefined) vocabularies.getTermAsQualifier(ModelConstants.DNET_PID_TYPES, ni.nameIdentifierScheme.get.toLowerCase()) else null
if (ni.nameIdentifier!= null && ni.nameIdentifier.isDefined) {
OafMapperUtils.structuredProperty(ni.nameIdentifier.get, q, dataInfo)
}
@ -427,11 +423,11 @@ object DataciteToOAFTransformation {
} yield rightsUri
val aRights: Option[Qualifier] = accessRights.map(r => {
vocabularies.getSynonymAsQualifier(ACCESS_MODE_VOCABULARY, r)
vocabularies.getSynonymAsQualifier(ModelConstants.DNET_ACCESS_MODES, r)
}).find(q => q != null)
val access_rights_qualifier = if (aRights.isDefined) aRights.get else OafMapperUtils.qualifier("UNKNOWN", "not available", ACCESS_MODE_VOCABULARY, ACCESS_MODE_VOCABULARY)
val access_rights_qualifier = if (aRights.isDefined) aRights.get else OafMapperUtils.qualifier(ModelConstants.UNKNOWN, ModelConstants.NOT_AVAILABLE, ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES)
if (client.isDefined) {
val hb = hostedByMap.getOrElse(client.get.toUpperCase(), unknown_repository)

View File

@ -18,7 +18,10 @@ import org.dom4j.DocumentException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.sun.media.sound.ModelChannelMixer;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.DataInfo;
import eu.dnetlib.dhp.schema.oaf.KeyValue;
import eu.dnetlib.dhp.schema.oaf.Qualifier;
@ -135,8 +138,9 @@ public class SparkCopyOpenorgsMergeRels extends AbstractSparkAction {
private boolean filterOpenorgsRels(Relation rel) {
if (rel.getRelClass().equals("isSimilarTo") && rel.getRelType().equals("organizationOrganization")
&& rel.getSubRelType().equals("dedup"))
if (rel.getRelClass().equals(ModelConstants.IS_SIMILAR_TO)
&& rel.getRelType().equals(ModelConstants.ORG_ORG_RELTYPE)
&& rel.getSubRelType().equals(ModelConstants.DEDUP))
return true;
return false;
}
@ -145,7 +149,7 @@ public class SparkCopyOpenorgsMergeRels extends AbstractSparkAction {
if (rel.getCollectedfrom() != null) {
for (KeyValue k : rel.getCollectedfrom()) {
if (k.getValue() != null && k.getValue().equals("OpenOrgs Database")) {
if (k.getValue() != null && k.getValue().equals(ModelConstants.OPENORGS_NAME)) {
return true;
}
}
@ -162,7 +166,7 @@ public class SparkCopyOpenorgsMergeRels extends AbstractSparkAction {
r.setTarget(target);
r.setRelClass(relClass);
r.setRelType(entityType + entityType.substring(0, 1).toUpperCase() + entityType.substring(1));
r.setSubRelType("dedup");
r.setSubRelType(ModelConstants.DEDUP);
DataInfo info = new DataInfo();
info.setDeletedbyinference(false);

View File

@ -2,17 +2,11 @@
package eu.dnetlib.dhp.oa.dedup;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.function.ForeachFunction;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.graphx.Edge;
import org.apache.spark.rdd.RDD;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode;
@ -22,14 +16,13 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.DataInfo;
import eu.dnetlib.dhp.schema.oaf.KeyValue;
import eu.dnetlib.dhp.schema.oaf.Qualifier;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
import eu.dnetlib.pace.config.DedupConfig;
//copy simrels (verified) from relation to the workdir in order to make them available for the deduplication
public class SparkCopyOpenorgsSimRels extends AbstractSparkAction {
@ -100,8 +93,9 @@ public class SparkCopyOpenorgsSimRels extends AbstractSparkAction {
private boolean filterOpenorgsRels(Relation rel) {
if (rel.getRelClass().equals("isSimilarTo") && rel.getRelType().equals("organizationOrganization")
&& rel.getSubRelType().equals("dedup") && isOpenorgs(rel))
if (rel.getRelClass().equals(ModelConstants.IS_SIMILAR_TO)
&& rel.getRelType().equals(ModelConstants.ORG_ORG_RELTYPE)
&& rel.getSubRelType().equals(ModelConstants.DEDUP) && isOpenorgs(rel))
return true;
return false;
}
@ -110,7 +104,7 @@ public class SparkCopyOpenorgsSimRels extends AbstractSparkAction {
if (rel.getCollectedfrom() != null) {
for (KeyValue k : rel.getCollectedfrom()) {
if (k.getValue() != null && k.getValue().equals("OpenOrgs Database")) {
if (k.getValue() != null && k.getValue().equals(ModelConstants.OPENORGS_NAME)) {
return true;
}
}

View File

@ -28,6 +28,7 @@ import com.google.common.hash.Hashing;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.dedup.graph.ConnectedComponent;
import eu.dnetlib.dhp.oa.dedup.graph.GraphProcessor;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.DataInfo;
import eu.dnetlib.dhp.schema.oaf.Qualifier;
@ -135,8 +136,8 @@ public class SparkCreateMergeRels extends AbstractSparkAction {
id -> {
List<Relation> tmp = new ArrayList<>();
tmp.add(rel(cc.getCcId(), id, "merges", dedupConf));
tmp.add(rel(id, cc.getCcId(), "isMergedIn", dedupConf));
tmp.add(rel(cc.getCcId(), id, ModelConstants.MERGES, dedupConf));
tmp.add(rel(id, cc.getCcId(), ModelConstants.IS_MERGED_IN, dedupConf));
return tmp.stream();
})
@ -152,7 +153,7 @@ public class SparkCreateMergeRels extends AbstractSparkAction {
r.setTarget(target);
r.setRelClass(relClass);
r.setRelType(entityType + entityType.substring(0, 1).toUpperCase() + entityType.substring(1));
r.setSubRelType("dedup");
r.setSubRelType(ModelConstants.DEDUP);
DataInfo info = new DataInfo();
info.setDeletedbyinference(false);

View File

@ -23,6 +23,7 @@ import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.dedup.model.OrgSimRel;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.DataInfo;
import eu.dnetlib.dhp.schema.oaf.Organization;
@ -232,13 +233,15 @@ public class SparkPrepareNewOrgs extends AbstractSparkAction {
switch (entityType) {
case "result":
if (rel.getRelClass().equals("isDifferentFrom") && rel.getRelType().equals("resultResult")
&& rel.getSubRelType().equals("dedup"))
if (rel.getRelClass().equals(ModelConstants.IS_DIFFERENT_FROM)
&& rel.getRelType().equals(ModelConstants.RESULT_RESULT)
&& rel.getSubRelType().equals(ModelConstants.DEDUP))
return true;
break;
case "organization":
if (rel.getRelClass().equals("isDifferentFrom") && rel.getRelType().equals("organizationOrganization")
&& rel.getSubRelType().equals("dedup"))
if (rel.getRelClass().equals(ModelConstants.IS_DIFFERENT_FROM)
&& rel.getRelType().equals(ModelConstants.ORG_ORG_RELTYPE)
&& rel.getSubRelType().equals(ModelConstants.DEDUP))
return true;
break;
default:

View File

@ -22,6 +22,7 @@ import com.google.common.collect.Lists;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.dedup.model.OrgSimRel;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.DataInfo;
import eu.dnetlib.dhp.schema.oaf.Organization;
@ -34,6 +35,8 @@ import scala.Tuple3;
public class SparkPrepareOrgRels extends AbstractSparkAction {
private static final Logger log = LoggerFactory.getLogger(SparkPrepareOrgRels.class);
public static final String OPENORGS_ID_PREFIX = "openorgs____";
public static final String CORDA_ID_PREFIX = "corda";
public SparkPrepareOrgRels(ArgumentApplicationParser parser, SparkSession spark) {
super(parser, spark);
@ -105,13 +108,15 @@ public class SparkPrepareOrgRels extends AbstractSparkAction {
switch (entityType) {
case "result":
if (rel.getRelClass().equals("isDifferentFrom") && rel.getRelType().equals("resultResult")
&& rel.getSubRelType().equals("dedup"))
if (rel.getRelClass().equals(ModelConstants.IS_DIFFERENT_FROM)
&& rel.getRelType().equals(ModelConstants.RESULT_RESULT)
&& rel.getSubRelType().equals(ModelConstants.DEDUP))
return true;
break;
case "organization":
if (rel.getRelClass().equals("isDifferentFrom") && rel.getRelType().equals("organizationOrganization")
&& rel.getSubRelType().equals("dedup"))
if (rel.getRelClass().equals(ModelConstants.IS_DIFFERENT_FROM)
&& rel.getRelType().equals(ModelConstants.ORG_ORG_RELTYPE)
&& rel.getSubRelType().equals(ModelConstants.DEDUP))
return true;
break;
default:
@ -241,19 +246,19 @@ public class SparkPrepareOrgRels extends AbstractSparkAction {
}
public static int compareIds(String o1, String o2) {
if (o1.contains("openorgs____") && o2.contains("openorgs____"))
if (o1.contains(OPENORGS_ID_PREFIX) && o2.contains(OPENORGS_ID_PREFIX))
return o1.compareTo(o2);
if (o1.contains("corda") && o2.contains("corda"))
if (o1.contains(CORDA_ID_PREFIX) && o2.contains(CORDA_ID_PREFIX))
return o1.compareTo(o2);
if (o1.contains("openorgs____"))
if (o1.contains(OPENORGS_ID_PREFIX))
return -1;
if (o2.contains("openorgs____"))
if (o2.contains(OPENORGS_ID_PREFIX))
return 1;
if (o1.contains("corda"))
if (o1.contains(CORDA_ID_PREFIX))
return -1;
if (o2.contains("corda"))
if (o2.contains(CORDA_ID_PREFIX))
return 1;
return o1.compareTo(o2);
@ -296,7 +301,7 @@ public class SparkPrepareOrgRels extends AbstractSparkAction {
for (String id1 : g._2()) {
for (String id2 : g._2()) {
if (!id1.equals(id2))
if (id1.contains("openorgs____") && !id2.contains("openorgsmesh"))
if (id1.contains(OPENORGS_ID_PREFIX) && !id2.contains("openorgsmesh"))
rels.add(new Tuple2<>(id1, id2));
}
}

View File

@ -15,6 +15,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.DataInfo;
import eu.dnetlib.dhp.schema.oaf.Relation;
@ -72,7 +73,7 @@ public class SparkPropagateRelation extends AbstractSparkAction {
.as(Encoders.bean(Relation.class));
Dataset<Tuple2<String, String>> mergedIds = mergeRels
.where(col("relClass").equalTo("merges"))
.where(col("relClass").equalTo(ModelConstants.MERGES))
.select(col("source"), col("target"))
.distinct()
.map(
@ -202,7 +203,7 @@ public class SparkPropagateRelation extends AbstractSparkAction {
}
private static boolean containsDedup(final Relation r) {
return r.getSource().toLowerCase().contains("dedup")
|| r.getTarget().toLowerCase().contains("dedup");
return r.getSource().toLowerCase().contains(ModelConstants.DEDUP)
|| r.getTarget().toLowerCase().contains(ModelConstants.DEDUP);
}
}

View File

@ -59,6 +59,7 @@ import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
import eu.dnetlib.dhp.oa.graph.raw.common.AbstractMigrationApplication;
import eu.dnetlib.dhp.oa.graph.raw.common.MigrateAction;
import eu.dnetlib.dhp.oa.graph.raw.common.VerifyNsPrefixPredicate;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.Context;
import eu.dnetlib.dhp.schema.oaf.DataInfo;
import eu.dnetlib.dhp.schema.oaf.Dataset;
@ -85,9 +86,6 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
public static final String SOURCE_TYPE = "source_type";
public static final String TARGET_TYPE = "target_type";
private static final String ORG_ORG_RELTYPE = "organizationOrganization";
private static final String ORG_ORG_SUBRELTYPE = "dedup";
private final DbClient dbClient;
private final long lastUpdateTimestamp;
@ -649,8 +647,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname"));
final Relation r1 = new Relation();
r1.setRelType(ORG_ORG_RELTYPE);
r1.setSubRelType(ORG_ORG_SUBRELTYPE);
r1.setRelType(ModelConstants.ORG_ORG_RELTYPE);
r1.setSubRelType(ModelConstants.DEDUP);
r1.setRelClass(relClass);
r1.setSource(orgId1);
r1.setTarget(orgId2);

View File

@ -5,7 +5,6 @@ import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.*;
import java.util.*;
import java.util.function.Function;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
@ -352,7 +351,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
res
.add(
getRelation(
otherId, docId, RESULT_RESULT, PART, HAS_PARTS, entity));
otherId, docId, RESULT_RESULT, PART, HAS_PART, entity));
} else {
// TODO catch more semantics
}

View File

@ -6,6 +6,7 @@ import java.time.LocalDateTime
import java.time.format.DateTimeFormatter
import eu.dnetlib.dhp.common.PacePerson
import eu.dnetlib.dhp.schema.action.AtomicAction
import eu.dnetlib.dhp.schema.common.ModelConstants
import eu.dnetlib.dhp.schema.oaf.{Author, Dataset, ExternalReference, Field, Instance, KeyValue, Oaf, Publication, Qualifier, Relation, Result, StructuredProperty}
import eu.dnetlib.dhp.schema.scholexplorer.{DLIDataset, DLIPublication}
import eu.dnetlib.dhp.utils.DHPUtils
@ -43,18 +44,18 @@ object DLIToOAF {
val relationTypeMapping: Map[String, (String, String)] = Map(
"IsReferencedBy" -> ("isRelatedTo", "relationship"),
"References" -> ("isRelatedTo", "relationship"),
"IsRelatedTo" -> ("isRelatedTo", "relationship"),
"IsSupplementedBy" -> ("isSupplementedBy", "supplement"),
"Documents"-> ("isRelatedTo", "relationship"),
"Cites" -> ("cites", "citation"),
"Unknown" -> ("isRelatedTo", "relationship"),
"IsSourceOf" -> ("isRelatedTo", "relationship"),
"IsCitedBy" -> ("IsCitedBy", "citation"),
"Reviews" -> ("reviews", "review"),
"Describes" -> ("isRelatedTo", "relationship"),
"HasAssociationWith" -> ("isRelatedTo", "relationship")
"IsReferencedBy" -> (ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP),
"References" -> (ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP),
"IsRelatedTo" -> (ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP),
"IsSupplementedBy" -> (ModelConstants.IS_SUPPLEMENTED_BY, ModelConstants.SUPPLEMENT),
"Documents" -> (ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP),
"Cites" -> (ModelConstants.CITES, ModelConstants.CITATION),
"Unknown" -> (ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP),
"IsSourceOf" -> (ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP),
"IsCitedBy" -> (ModelConstants.IS_CITED_BY, ModelConstants.CITATION),
"Reviews" -> (ModelConstants.REVIEWS, ModelConstants.REVIEW),
"Describes" -> (ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP),
"HasAssociationWith" -> (ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP)
)
val expectecdPidType = List("uniprot", "ena", "chembl", "ncbi-n", "ncbi-p", "genbank", "pdb", "url")
@ -279,7 +280,7 @@ object DLIToOAF {
val rt = r.getRelType
if (!relationTypeMapping.contains(rt))
return null
r.setRelType("resultResult")
r.setRelType(ModelConstants.RESULT_RESULT)
r.setRelClass(relationTypeMapping(rt)._1)
r.setSubRelType(relationTypeMapping(rt)._2)
r.setSource(generateId(r.getSource))
@ -316,7 +317,7 @@ object DLIToOAF {
if (d.getAuthor == null || d.getAuthor.isEmpty)
return null
result.setAuthor(d.getAuthor.asScala.map(convertAuthor).asJava)
result.setResulttype(createQualifier(d.getResulttype.getClassid, d.getResulttype.getClassname, "dnet:result_typologies", "dnet:result_typologies"))
result.setResulttype(createQualifier(d.getResulttype.getClassid, d.getResulttype.getClassname, ModelConstants.DNET_RESULT_TYPOLOGIES, ModelConstants.DNET_RESULT_TYPOLOGIES))
if (d.getSubject != null)
result.setSubject(d.getSubject.asScala.map(convertSubject).asJava)
@ -337,7 +338,7 @@ object DLIToOAF {
result.setDateofacceptance(asField(d.getRelevantdate.get(0).getValue))
result.setPublisher(d.getPublisher)
result.setSource(d.getSource)
result.setBestaccessright(createAccessRight("UNKNOWN", "not available", "dnet:access_modes", "dnet:access_modes"))
result.setBestaccessright(createAccessRight(ModelConstants.UNKNOWN, ModelConstants.NOT_AVAILABLE, ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES))
val instance_urls = if (fpids.head.length < 5) s"https://www.rcsb.org/structure/${fpids.head}" else s"https://dx.doi.org/${fpids.head}"
@ -367,13 +368,13 @@ object DLIToOAF {
val i = new Instance
i.setUrl(List(url).asJava)
if (dataset)
i.setInstancetype(createQualifier("0021", "Dataset", "dnet:publication_resource", "dnet:publication_resource"))
i.setInstancetype(createQualifier("0021", "Dataset", ModelConstants.DNET_PUBLICATION_RESOURCE, ModelConstants.DNET_PUBLICATION_RESOURCE))
else
i.setInstancetype(createQualifier("0000", "Unknown", "dnet:publication_resource", "dnet:publication_resource"))
i.setInstancetype(createQualifier("0000", "Unknown", ModelConstants.DNET_PUBLICATION_RESOURCE, ModelConstants.DNET_PUBLICATION_RESOURCE))
if (originalInstance != null && originalInstance.getHostedby != null)
i.setHostedby(originalInstance.getHostedby)
i.setAccessright(createAccessRight("UNKNOWN", "not available", "dnet:access_modes", "dnet:access_modes"))
i.setAccessright(createAccessRight(ModelConstants.UNKNOWN, ModelConstants.NOT_AVAILABLE, ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES))
i.setDateofacceptance(doa)
i
@ -383,19 +384,19 @@ object DLIToOAF {
def patchRelevantDate(d: StructuredProperty): StructuredProperty = {
d.setQualifier(createQualifier("UNKNOWN", "dnet:dataCite_date"))
d.setQualifier(createQualifier(ModelConstants.UNKNOWN, ModelConstants.DNET_DATACITE_DATE))
d
}
def patchTitle(t: StructuredProperty): StructuredProperty = {
t.setQualifier(createQualifier("main title", "dnet:dataCite_title"))
t.setQualifier(createQualifier("main title", ModelConstants.DNET_DATACITE_TITLE))
t
}
def convertSubject(s: StructuredProperty): StructuredProperty = {
s.setQualifier(createQualifier("keyword", "dnet:subject_classification_typologies"))
s.setQualifier(createQualifier("keyword", ModelConstants.DNET_SUBJECT_TYPOLOGIES))
s

View File

@ -8,6 +8,7 @@ import java.util.Optional;
import com.google.common.collect.ComparisonChain;
import com.google.common.collect.Maps;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.Relation;
public class RelationComparator implements Comparator<Relation> {
@ -15,18 +16,18 @@ public class RelationComparator implements Comparator<Relation> {
private static final Map<String, Integer> weights = Maps.newHashMap();
static {
weights.put("outcome", 0);
weights.put("supplement", 1);
weights.put("review", 2);
weights.put("citation", 3);
weights.put("affiliation", 4);
weights.put("relationship", 5);
weights.put("publicationDataset", 6);
weights.put("similarity", 7);
weights.put(ModelConstants.OUTCOME, 0);
weights.put(ModelConstants.SUPPLEMENT, 1);
weights.put(ModelConstants.REVIEWS, 2);
weights.put(ModelConstants.CITATION, 3);
weights.put(ModelConstants.AFFILIATION, 4);
weights.put(ModelConstants.RELATIONSHIP, 5);
weights.put(ModelConstants.PUBLICATION_DATASET, 6);
weights.put(ModelConstants.SIMILARITY, 7);
weights.put("provision", 8);
weights.put("participation", 9);
weights.put("dedup", 10);
weights.put(ModelConstants.PROVISION, 8);
weights.put(ModelConstants.PARTICIPATION, 9);
weights.put(ModelConstants.DEDUP, 10);
}
private Integer getWeight(Relation o) {

View File

@ -9,6 +9,7 @@ import com.fasterxml.jackson.annotation.JsonIgnore;
import com.google.common.collect.ComparisonChain;
import com.google.common.collect.Maps;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.Relation;
public class SortableRelation extends Relation implements Comparable<SortableRelation>, Serializable {
@ -16,18 +17,18 @@ public class SortableRelation extends Relation implements Comparable<SortableRel
private static final Map<String, Integer> weights = Maps.newHashMap();
static {
weights.put("outcome", 0);
weights.put("supplement", 1);
weights.put("review", 2);
weights.put("citation", 3);
weights.put("affiliation", 4);
weights.put("relationship", 5);
weights.put("publicationDataset", 6);
weights.put("similarity", 7);
weights.put(ModelConstants.OUTCOME, 0);
weights.put(ModelConstants.SUPPLEMENT, 1);
weights.put(ModelConstants.REVIEW, 2);
weights.put(ModelConstants.CITATION, 3);
weights.put(ModelConstants.AFFILIATION, 4);
weights.put(ModelConstants.RELATIONSHIP, 5);
weights.put(ModelConstants.PUBLICATION_RESULTTYPE_CLASSID, 6);
weights.put(ModelConstants.SIMILARITY, 7);
weights.put("provision", 8);
weights.put("participation", 9);
weights.put("dedup", 10);
weights.put(ModelConstants.PROVISION, 8);
weights.put(ModelConstants.PARTICIPATION, 9);
weights.put(ModelConstants.DEDUP, 10);
}
private static final long serialVersionUID = 34753984579L;

View File

@ -9,6 +9,7 @@ import com.google.common.base.Objects;
import com.google.common.collect.ComparisonChain;
import com.google.common.collect.Maps;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.Relation;
public class SortableRelationKey implements Comparable<SortableRelationKey>, Serializable {
@ -16,18 +17,18 @@ public class SortableRelationKey implements Comparable<SortableRelationKey>, Ser
private static final Map<String, Integer> weights = Maps.newHashMap();
static {
weights.put("participation", 0);
weights.put(ModelConstants.PARTICIPATION, 0);
weights.put("outcome", 1);
weights.put("affiliation", 2);
weights.put("dedup", 3);
weights.put("publicationDataset", 4);
weights.put("citation", 5);
weights.put("supplement", 6);
weights.put("review", 7);
weights.put("relationship", 8);
weights.put("provision", 9);
weights.put("similarity", 10);
weights.put(ModelConstants.OUTCOME, 1);
weights.put(ModelConstants.AFFILIATION, 2);
weights.put(ModelConstants.DEDUP, 3);
weights.put(ModelConstants.PUBLICATION_DATASET, 4);
weights.put(ModelConstants.CITATION, 5);
weights.put(ModelConstants.SUPPLEMENT, 6);
weights.put(ModelConstants.REVIEW, 7);
weights.put(ModelConstants.RELATIONSHIP, 8);
weights.put(ModelConstants.PROVISION, 9);
weights.put(ModelConstants.SIMILARITY, 10);
}
private static final long serialVersionUID = 3232323;

View File

@ -38,6 +38,7 @@ import com.mycila.xmltool.XMLTag;
import eu.dnetlib.dhp.oa.provision.model.*;
import eu.dnetlib.dhp.schema.common.EntityType;
import eu.dnetlib.dhp.schema.common.MainEntityType;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.Result;
@ -1226,7 +1227,7 @@ public class XmlRecordFactory implements Serializable {
}
private boolean isDuplicate(RelatedEntityWrapper link) {
return REL_SUBTYPE_DEDUP.equalsIgnoreCase(link.getRelation().getSubRelType());
return ModelConstants.DEDUP.equalsIgnoreCase(link.getRelation().getSubRelType());
}
private List<String> listExtraInfo(OafEntity entity) {