From 869f5762738fc029f7674d27be554a92cee96183 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 29 Apr 2020 18:14:52 +0200 Subject: [PATCH] added hash map for relationship entityType id prefix, and relation inverse --- .../dhp/schema/common/ModelSupport.java | 166 +++++++++++++ .../dhp/schema/common/RelationInverse.java | 44 +++- .../blacklist/oozie_app/config-default.xml | 54 +++++ .../dhp/blacklist/oozie_app/workflow.xml | 227 ++++++++++++++++++ 4 files changed, 490 insertions(+), 1 deletion(-) create mode 100644 dhp-workflows/dhp-blacklist/src/main/resources/eu/dnetlib/dhp/blacklist/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-blacklist/src/main/resources/eu/dnetlib/dhp/blacklist/oozie_app/workflow.xml diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java index 0054e6d6f..8587dfca5 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java @@ -50,6 +50,172 @@ public class ModelSupport { oafTypes.put("relation", Relation.class); } + public static final Map entityIdPrefix = Maps.newHashMap(); + + static { + entityIdPrefix.put("datasource", "10"); + entityIdPrefix.put("organization", "20"); + entityIdPrefix.put("project", "40"); + entityIdPrefix.put("result","50"); + } + + public static final Map relationInverseMap = Maps.newHashMap(); + + static { + relationInverseMap.put("personResult_authorship_isAuthorOf", new RelationInverse() + .setRelation("isAuthorOf") + .setInverse("hasAuthor") + .setRelType("personResult") + .setSubReltype("authorship")); + relationInverseMap.put("personResult_authorship_hasAuthor", new RelationInverse() + .setInverse("isAuthorOf") + .setRelation("hasAuthor") + .setRelType("personResult") + .setSubReltype("authorship")); + relationInverseMap.put("projectOrganization_participation_isParticipant", new RelationInverse() + .setRelation("isParticipant") + .setInverse("hasParticipant") + .setRelType("projectOrganization") + .setSubReltype("participation")); + relationInverseMap.put("projectOrganization_participation_hasParticipant", new RelationInverse() + .setInverse("isParticipant") + .setRelation("hasParticipant") + .setRelType("projectOrganization") + .setSubReltype("participation")); + relationInverseMap.put("resultOrganization_affiliation_hasAuthorInstitution", new RelationInverse() + .setRelation("hasAuthorInstitution") + .setInverse("isAuthorInstitutionOf") + .setRelType("resultOrganization") + .setSubReltype("affiliation")); + relationInverseMap.put("resultOrganization_affiliation_isAuthorInstitutionOf", new RelationInverse() + .setInverse("hasAuthorInstitution") + .setRelation("isAuthorInstitutionOf") + .setRelType("resultOrganization") + .setSubReltype("affiliation")); + relationInverseMap.put("organizationOrganization_dedup_merges", new RelationInverse() + .setRelation("merges") + .setInverse("isMergedIn") + .setRelType("organizationOrganization") + .setSubReltype("dedup")); + relationInverseMap.put("organizationOrganization_dedup_isMergedIn", new RelationInverse() + .setInverse("merges") + .setRelation("isMergedIn") + .setRelType("organizationOrganization") + .setSubReltype("dedup")); + relationInverseMap.put("organizationOrganization_dedupSimilarity_isSimilarTo", new RelationInverse() + .setInverse("isSimilarTo") + .setRelation("isSimilarTo") + .setRelType("organizationOrganization") + .setSubReltype("dedupSimilarity")); + + relationInverseMap.put("resultProject_outcome_isProducedBy", new RelationInverse() + .setRelation("isProducedBy") + .setInverse("produces") + .setRelType("resultProject") + .setSubReltype("outcome")); + relationInverseMap.put("resultProject_outcome_produces", new RelationInverse() + .setInverse("isProducedBy") + .setRelation("produces") + .setRelType("resultProject") + .setSubReltype("outcome")); + relationInverseMap.put("projectPerson_contactPerson_isContact", new RelationInverse() + .setRelation("isContact") + .setInverse("hasContact") + .setRelType("projectPerson") + .setSubReltype("contactPerson")); + relationInverseMap.put("projectPerson_contactPerson_hasContact", new RelationInverse() + .setInverse("isContact") + .setRelation("hasContact") + .setRelType("personPerson") + .setSubReltype("coAuthorship")); + relationInverseMap.put("personPerson_coAuthorship_isCoauthorOf", new RelationInverse() + .setInverse("isCoAuthorOf") + .setRelation("isCoAuthorOf") + .setRelType("personPerson") + .setSubReltype("coAuthorship")); + relationInverseMap.put("personPerson_dedup_merges", new RelationInverse() + .setInverse("isMergedIn") + .setRelation("merges") + .setRelType("personPerson") + .setSubReltype("dedup")); + relationInverseMap.put("personPerson_dedup_isMergedIn", new RelationInverse() + .setInverse("merges") + .setRelation("isMergedIn") + .setRelType("personPerson") + .setSubReltype("dedup")); + relationInverseMap.put("personPerson_dedupSimilarity_isSimilarTo", new RelationInverse() + .setInverse("isSimilarTo") + .setRelation("isSimilarTo") + .setRelType("personPerson") + .setSubReltype("dedupSimilarity")); + relationInverseMap.put("datasourceOrganization_provision_isProvidedBy", new RelationInverse() + .setInverse("provides") + .setRelation("isProvidedBy") + .setRelType("datasourceOrganization") + .setSubReltype("provision")); + relationInverseMap.put("datasourceOrganization_provision_provides", new RelationInverse() + .setInverse("isProvidedBy") + .setRelation("provides") + .setRelType("datasourceOrganization") + .setSubReltype("provision")); + relationInverseMap.put("resultResult_similarity_hasAmongTopNSimilarDocuments", new RelationInverse() + .setInverse("isAmongTopNSimilarDocuments") + .setRelation("hasAmongTopNSimilarDocuments") + .setRelType("resultResult") + .setSubReltype("similarity")); + relationInverseMap.put("resultResult_similarity_isAmongTopNSimilarDocuments", new RelationInverse() + .setInverse("hasAmongTopNSimilarDocuments") + .setRelation("isAmongTopNSimilarDocuments") + .setRelType("resultResult") + .setSubReltype("similarity")); + relationInverseMap.put("resultResult_relationship_isRelatedTo", new RelationInverse() + .setInverse("isRelatedTo") + .setRelation("isRelatedTo") + .setRelType("resultResult") + .setSubReltype("relationship")); + relationInverseMap.put("resultResult_similarity_isAmongTopNSimilarDocuments", new RelationInverse() + .setInverse("hasAmongTopNSimilarDocuments") + .setRelation("isAmongTopNSimilarDocuments") + .setRelType("resultResult") + .setSubReltype("similarity")); + relationInverseMap.put("resultResult_supplement_isSupplementTo", new RelationInverse() + .setInverse("isSupplementedBy") + .setRelation("isSupplementTo") + .setRelType("resultResult") + .setSubReltype("supplement")); + relationInverseMap.put("resultResult_supplement_isSupplementedBy", new RelationInverse() + .setInverse("isSupplementTo") + .setRelation("isSupplementedBy") + .setRelType("resultResult") + .setSubReltype("supplement")); + relationInverseMap.put("resultResult_part_isPartOf", new RelationInverse() + .setInverse("hasPart") + .setRelation("isPartOf") + .setRelType("resultResult") + .setSubReltype("part")); + relationInverseMap.put("resultResult_part_hasPart", new RelationInverse() + .setInverse("isPartOf") + .setRelation("hasPart") + .setRelType("resultResult") + .setSubReltype("part")); + relationInverseMap.put("resultResult_dedup_merges", new RelationInverse() + .setInverse("isMergedIn") + .setRelation("merges") + .setRelType("resultResult") + .setSubReltype("dedup")); + relationInverseMap.put("resultResult_dedup_isMergedIn", new RelationInverse() + .setInverse("merges") + .setRelation("isMergedIn") + .setRelType("resultResult") + .setSubReltype("dedup")); + relationInverseMap.put("resultResult_dedupSimilarity_isSimilarTo", new RelationInverse() + .setInverse("isSimilarTo") + .setRelation("isSimilarTo") + .setRelType("resultResult") + .setSubReltype("dedupSimilarity")); + + } + private static final String schemeTemplate = "dnet:%s_%s_relations"; private ModelSupport() {} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/RelationInverse.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/RelationInverse.java index b726a8c16..d61dce689 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/RelationInverse.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/RelationInverse.java @@ -1,4 +1,46 @@ package eu.dnetlib.dhp.schema.common; -public class RelationInverse { +public class RelationInverse { + private String relation; + private String inverse; + private String relType; + private String subReltype; + + public String getRelType() { + return relType; + } + + public RelationInverse setRelType(String relType) { + this.relType = relType; + return this; + } + + public String getSubReltype() { + return subReltype; + } + + public RelationInverse setSubReltype(String subReltype) { + this.subReltype = subReltype; + return this; + } + + public String getRelation() { + return relation; + } + + public RelationInverse setRelation(String relation) { + this.relation = relation; + return this; + } + + public String getInverse() { + return inverse; + } + + public RelationInverse setInverse(String inverse) { + this.inverse = inverse; + return this; + } + + } diff --git a/dhp-workflows/dhp-blacklist/src/main/resources/eu/dnetlib/dhp/blacklist/oozie_app/config-default.xml b/dhp-workflows/dhp-blacklist/src/main/resources/eu/dnetlib/dhp/blacklist/oozie_app/config-default.xml new file mode 100644 index 000000000..fe82ae194 --- /dev/null +++ b/dhp-workflows/dhp-blacklist/src/main/resources/eu/dnetlib/dhp/blacklist/oozie_app/config-default.xml @@ -0,0 +1,54 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + + hive_metastore_uris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + spark2YarnHistoryServerAddress + http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + + + sparkExecutorNumber + 4 + + + spark2EventLogDir + /user/spark/spark2ApplicationHistory + + + sparkDriverMemory + 15G + + + sparkExecutorMemory + 6G + + + sparkExecutorCores + 1 + + \ No newline at end of file diff --git a/dhp-workflows/dhp-blacklist/src/main/resources/eu/dnetlib/dhp/blacklist/oozie_app/workflow.xml b/dhp-workflows/dhp-blacklist/src/main/resources/eu/dnetlib/dhp/blacklist/oozie_app/workflow.xml new file mode 100644 index 000000000..91be13210 --- /dev/null +++ b/dhp-workflows/dhp-blacklist/src/main/resources/eu/dnetlib/dhp/blacklist/oozie_app/workflow.xml @@ -0,0 +1,227 @@ + + + + sourcePath + the source path + + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorCores + number of cores used by single executor + + + isLookUpUrl + the isLookup service endpoint + + + pathMap + the json path associated to each selection field + + + outputPath + the output path + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + + + + + + + + + + + + + + + + + + + + + + ${jobTracker} + ${nameNode} + ${nameNode}/${sourcePath}/relation + ${nameNode}/${outputPath}/relation + + + + + + + + ${jobTracker} + ${nameNode} + ${nameNode}/${sourcePath}/organization + ${nameNode}/${outputPath}/organization + + + + + + + + ${jobTracker} + ${nameNode} + ${nameNode}/${sourcePath}/project + ${nameNode}/${outputPath}/project + + + + + + + + ${jobTracker} + ${nameNode} + ${nameNode}/${sourcePath}/datasource + ${nameNode}/${outputPath}/datasource + + + + + + + + + + + + + + + + ${jobTracker} + ${nameNode} + yarn-cluster + cluster + bulkTagging-publication + eu.dnetlib.dhp.bulktag.SparkBulkTagJob2 + dhp-bulktag-${projectVersion}.jar + + --num-executors=${sparkExecutorNumber} + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --sourcePath${sourcePath}/publication + --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication + --outputPath${outputPath}/publication + --pathMap${pathMap} + --isLookUpUrl${isLookUpUrl} + + + + + + + ${jobTracker} + ${nameNode} + yarn-cluster + cluster + bulkTagging-dataset + eu.dnetlib.dhp.bulktag.SparkBulkTagJob2 + dhp-bulktag-${projectVersion}.jar + + --num-executors=${sparkExecutorNumber} + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --sourcePath${sourcePath}/dataset + --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset + --outputPath${outputPath}/dataset + --pathMap${pathMap} + --isLookUpUrl${isLookUpUrl} + + + + + + + ${jobTracker} + ${nameNode} + yarn-cluster + cluster + bulkTagging-orp + eu.dnetlib.dhp.bulktag.SparkBulkTagJob2 + dhp-bulktag-${projectVersion}.jar + + --num-executors=${sparkExecutorNumber} + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --sourcePath${sourcePath}/otherresearchproduct + --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct + --outputPath${outputPath}/otherresearchproduct + --pathMap${pathMap} + --isLookUpUrl${isLookUpUrl} + + + + + + + ${jobTracker} + ${nameNode} + yarn-cluster + cluster + bulkTagging-software + eu.dnetlib.dhp.bulktag.SparkBulkTagJob2 + dhp-bulktag-${projectVersion}.jar + + --num-executors=${sparkExecutorNumber} + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --sourcePath${sourcePath}/software + --resultTableNameeu.dnetlib.dhp.schema.oaf.Software + --outputPath${outputPath}/software + --pathMap${pathMap} + --isLookUpUrl${isLookUpUrl} + + + + + + + \ No newline at end of file