diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java index 1a0afb981..6b57829cd 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java @@ -111,6 +111,13 @@ public class SparkBulkTagJob { .mode(SaveMode.Overwrite) .option("compression", "gzip") .json(outputPath); + + readPath(spark, outputPath, resultClazz) + .write() + .mode(SaveMode.Overwrite) + .option("compression","gzip") + .json(inputPath); + } public static Dataset readPath( diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java index 0757ebccd..96493336e 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java @@ -122,6 +122,12 @@ public class SparkResultToOrganizationFromIstRepoJob { .mode(SaveMode.Append) .option("compression", "gzip") .json(outputPath); + + readPath(spark, outputPath, Relation.class) + .write() + .mode(SaveMode.Overwrite) + .option("compression","gzip") + .json(inputPath.substring(0, inputPath.indexOf("/") + 1) + "relation"); } private static FlatMapFunction, Relation> createRelationFn() { diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/config-default.xml new file mode 100644 index 000000000..d262cb6e0 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/config-default.xml @@ -0,0 +1,30 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + hiveMetastoreUris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + hiveJdbcUrl + jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000 + + + hiveDbName + openaire + + + oozie.launcher.mapreduce.user.classpath.first + true + + diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/import.txt b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/import.txt new file mode 100644 index 000000000..9ecce9e12 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/import.txt @@ -0,0 +1,4 @@ +## This is a classpath-based import file (this header is required) +orcid_propagation classpath eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app +dump_funder classpath eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/funder/oozie_app +dump_community classpath eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/community/oozie_app \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml new file mode 100644 index 000000000..bb1538ae0 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml @@ -0,0 +1,187 @@ + + + + + sourcePath + the source path + + + allowedsemrels + the semantic relationships allowed for propagation + + + outputPath + the output path + + + + isLookUpUrl + the isLookup service endpoint + + + pathMap + the json path associated to each selection field + + + + hiveDbName + the target hive database name + + + hiveJdbcUrl + hive server jdbc url + + + hiveMetastoreUris + hive server metastore URIs + + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorCores + number of cores used by single executor + + + oozieActionShareLibForSpark2 + oozie action sharelib for spark 2.* + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + spark 2.* extra listeners classname + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + spark 2.* sql query execution listeners classname + + + spark2YarnHistoryServerAddress + spark 2.* yarn history server address + + + spark2EventLogDir + spark 2.* event log dir location + + + + + ${jobTracker} + ${nameNode} + + + mapreduce.job.queuename + ${queueName} + + + oozie.launcher.mapred.job.queue.name + ${oozieLauncherQueueName} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + ${wf:conf('onlyUpload') eq true} + + + + + + + + ${wf:appPath()}/orcid_propagation + + + + + sourcePath + ${sourcePath} + + + allowedsemrels + ${allowedsemrels} + + + outputPath + ${outputPath} + + + + + + + + + + ${wf:appPath()}/bulk_tagging + + + + + sourcePath + ${outputPath} + + + isLookUpUrl + ${isLookUpUrl} + + + pathMap + ${pathMap} + + + outputPath + ${workingDir}/results + + + + + + + + + + ${wf:appPath()}/bulk_tagging + + + + + sourcePath + ${outputPath} + + + outputPath + ${workingDir}/relations + + + blacklist + ${blacklist} + + + + + + + + + + + diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/input_bulkTag_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflow/bulktag/input_bulkTag_parameters.json similarity index 97% rename from dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/input_bulkTag_parameters.json rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflow/bulktag/input_bulkTag_parameters.json index a37d7d168..31c3bf3c4 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/input_bulkTag_parameters.json +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflow/bulktag/input_bulkTag_parameters.json @@ -25,7 +25,7 @@ }, { "paramName": "out", - "paramLongName": "outputPath", + "paramLongName": "workingPath", "paramDescription": "the path used to store temporary output files", "paramRequired": true }, diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflow/bulktag/oozie_app/config-default.xml similarity index 100% rename from dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/config-default.xml rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflow/bulktag/oozie_app/config-default.xml diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflow/bulktag/oozie_app/workflow.xml similarity index 76% rename from dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflow/bulktag/oozie_app/workflow.xml index f019f8413..25d288ea9 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflow/bulktag/oozie_app/workflow.xml @@ -13,7 +13,7 @@ the json path associated to each selection field - outputPath + workingPath the output path @@ -29,65 +29,12 @@ - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - - - - - - - - - - - - - ${nameNode}/${sourcePath}/relation - ${nameNode}/${outputPath}/relation - - - - - - - - ${nameNode}/${sourcePath}/organization - ${nameNode}/${outputPath}/organization - - - - - - - - ${nameNode}/${sourcePath}/project - ${nameNode}/${outputPath}/project - - - - - - - - ${nameNode}/${sourcePath}/datasource - ${nameNode}/${outputPath}/datasource - - - - - - @@ -115,7 +62,7 @@ --sourcePath${sourcePath}/publication --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --outputPath${outputPath}/publication + --workingPath${workingPath}/publication --pathMap${pathMap} --isLookUpUrl${isLookUpUrl} @@ -142,7 +89,7 @@ --sourcePath${sourcePath}/dataset --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --outputPath${outputPath}/dataset + --workingPath${workingPath}/dataset --pathMap${pathMap} --isLookUpUrl${isLookUpUrl} @@ -169,7 +116,7 @@ --sourcePath${sourcePath}/otherresearchproduct --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath${outputPath}/otherresearchproduct + --workingPath${workingPath}/otherresearchproduct --pathMap${pathMap} --isLookUpUrl${isLookUpUrl} @@ -196,7 +143,7 @@ --sourcePath${sourcePath}/software --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --outputPath${outputPath}/software + --workingPath${workingPath}/software --pathMap${pathMap} --isLookUpUrl${isLookUpUrl} diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflow/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json similarity index 100% rename from dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflow/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflow/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters.json similarity index 100% rename from dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters.json rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflow/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters.json diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters2.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflow/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters2.json similarity index 100% rename from dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters2.json rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflow/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters2.json diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflow/orcidtoresultfromsemrel/oozie_app/config-default.xml similarity index 100% rename from dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/oozie_app/config-default.xml rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflow/orcidtoresultfromsemrel/oozie_app/config-default.xml diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflow/orcidtoresultfromsemrel/oozie_app/workflow.xml similarity index 91% rename from dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/oozie_app/workflow.xml rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflow/orcidtoresultfromsemrel/oozie_app/workflow.xml index 5f52c1658..f957f34ad 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflow/orcidtoresultfromsemrel/oozie_app/workflow.xml @@ -36,49 +36,7 @@ - - - ${jobTracker} - ${nameNode} - ${nameNode}/${sourcePath}/relation - ${nameNode}/${outputPath}/relation - - - - - - - ${jobTracker} - ${nameNode} - ${nameNode}/${sourcePath}/organization - ${nameNode}/${outputPath}/organization - - - - - - - - ${jobTracker} - ${nameNode} - ${nameNode}/${sourcePath}/project - ${nameNode}/${outputPath}/project - - - - - - - - ${jobTracker} - ${nameNode} - ${nameNode}/${sourcePath}/datasource - ${nameNode}/${outputPath}/datasource - - - - diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_prepareresultorg_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflow/resulttoorganizationfrominstrepo/input_prepareresultorg_parameters.json similarity index 100% rename from dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_prepareresultorg_parameters.json rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflow/resulttoorganizationfrominstrepo/input_prepareresultorg_parameters.json diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_propagationresulaffiliationfrominstrepo_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflow/resulttoorganizationfrominstrepo/input_propagationresulaffiliationfrominstrepo_parameters.json similarity index 100% rename from dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_propagationresulaffiliationfrominstrepo_parameters.json rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflow/resulttoorganizationfrominstrepo/input_propagationresulaffiliationfrominstrepo_parameters.json diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflow/resulttoorganizationfrominstrepo/oozie_app/config-default.xml similarity index 100% rename from dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/config-default.xml rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflow/resulttoorganizationfrominstrepo/oozie_app/config-default.xml diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflow/resulttoorganizationfrominstrepo/oozie_app/workflow.xml similarity index 75% rename from dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/workflow.xml rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflow/resulttoorganizationfrominstrepo/oozie_app/workflow.xml index edfff8817..511a5d3b3 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflow/resulttoorganizationfrominstrepo/oozie_app/workflow.xml @@ -21,105 +21,12 @@ - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - - - - - - - - - - - - - - - - - ${nameNode}/${sourcePath}/relation - ${nameNode}/${outputPath}/relation - - - - - - - - ${nameNode}/${sourcePath}/publication - ${nameNode}/${outputPath}/publication - - - - - - - - ${nameNode}/${sourcePath}/dataset - ${nameNode}/${outputPath}/dataset - - - - - - - - ${nameNode}/${sourcePath}/otherresearchproduct - ${nameNode}/${outputPath}/otherresearchproduct - - - - - - - - ${nameNode}/${sourcePath}/software - ${nameNode}/${outputPath}/software - - - - - - - - ${nameNode}/${sourcePath}/organization - ${nameNode}/${outputPath}/organization - - - - - - - - ${nameNode}/${sourcePath}/project - ${nameNode}/${outputPath}/project - - - - - - - - ${nameNode}/${sourcePath}/datasource - ${nameNode}/${outputPath}/datasource - - - - - -