From ce8b1d0bc3efa1e72f46bb34afeb89ddffd37e82 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 30 Apr 2020 14:38:54 +0200 Subject: [PATCH] new workflow definition to be inserted in the provision pipeline --- .../dhp/bulktag/oozie_app/workflow.xml | 94 +++++++++++++------ .../countrypropagation/oozie_app/workflow.xml | 88 +++++++++++------ .../oozie_app/workflow.xml | 76 ++++++++++++--- .../projecttoresult/oozie_app/workflow.xml | 83 ++++++++++------ ...t_preparecommunitytoresult_parameters.json | 6 -- .../oozie_app/workflow.xml | 76 ++++++++++++--- .../oozie_app/workflow.xml | 19 ---- 7 files changed, 302 insertions(+), 140 deletions(-) diff --git a/dhp-workflows/dhp-bulktag/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml b/dhp-workflows/dhp-bulktag/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml index 02efeb7ae..4f3d050b3 100644 --- a/dhp-workflows/dhp-bulktag/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-bulktag/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml @@ -4,18 +4,6 @@ sourcePath the source path - - sparkDriverMemory - memory for driver process - - - sparkExecutorMemory - memory for individual executor - - - sparkExecutorCores - number of cores used by single executor - isLookupUrl the isLookup service endpoint @@ -24,6 +12,10 @@ protoMap the json path associated to each selection field + + outputPath + the output path + @@ -34,27 +26,73 @@ - - - - - + + + + + + + + - + + + + + + + + ${jobTracker} ${nameNode} ${nameNode}/${sourcePath}/relation - ${nameNode}/${workingDir}/relation + ${nameNode}/${outputPath}/relation - + + + + ${jobTracker} + ${nameNode} + ${nameNode}/${sourcePath}/organization + ${nameNode}/${outputPath}/organization + + + + + + + ${jobTracker} + ${nameNode} + ${nameNode}/${sourcePath}/project + ${nameNode}/${outputPath}/project + + + + + + + + ${jobTracker} + ${nameNode} + ${nameNode}/${sourcePath}/datasource + ${nameNode}/${outputPath}/datasource + + + + + + + + + @@ -81,11 +119,11 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --sourcePath${sourcePath}/publication - --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --outputPath${workingDir}/publication + --outputPath${outputPath}/publication --protoMap${protoMap} --isLookupUrl${isLookupUrl} + --saveGraph${saveGraph} @@ -110,11 +148,11 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --sourcePath${sourcePath}/dataset - --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --outputPath${workingDir}/dataset + --outputPath${outputPath}/dataset --protoMap${protoMap} --isLookupUrl${isLookupUrl} + --saveGraph${saveGraph} @@ -139,11 +177,11 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --sourcePath${sourcePath}/otherresearchproduct - --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath${workingDir}/otherresearchproduct + --outputPath${outputPath}/otherresearchproduct --protoMap${protoMap} --isLookupUrl${isLookupUrl} + --saveGraph${saveGraph} @@ -168,11 +206,11 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --sourcePath${sourcePath}/software - --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --outputPath${workingDir}/software + --outputPath${outputPath}/software --protoMap${protoMap} --isLookupUrl${isLookupUrl} + --saveGraph${saveGraph} diff --git a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml index bd2473308..d5fb199cd 100644 --- a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml @@ -13,24 +13,8 @@ the allowed types - sparkDriverMemory - memory for driver process - - - sparkExecutorMemory - memory for individual executor - - - sparkExecutorCores - number of cores used by single executor - - - sparkExecutorNumber - number of executors used - - - saveGraph - writes new version of the graph after the propagation step + outputPath + the output path @@ -43,26 +27,70 @@ - - - - - + + + + + + + + - + + + + + + + ${jobTracker} ${nameNode} ${nameNode}/${sourcePath}/relation - ${nameNode}/${workingDir}/relation + ${nameNode}/${outputPath}/relation - + + + + + ${jobTracker} + ${nameNode} + ${nameNode}/${sourcePath}/organization + ${nameNode}/${outputPath}/organization + + + + + + + ${jobTracker} + ${nameNode} + ${nameNode}/${sourcePath}/project + ${nameNode}/${outputPath}/project + + + + + + + + ${jobTracker} + ${nameNode} + ${nameNode}/${sourcePath}/datasource + ${nameNode}/${outputPath}/datasource + + + + + + + yarn @@ -117,7 +145,7 @@ --hive_metastore_uris${hive_metastore_uris} --saveGraph${saveGraph} --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --outputPath${workingDir}/publication + --outputPath${outputPath}/publication --preparedInfoPath${workingDir}/preparedInfo @@ -146,7 +174,7 @@ --hive_metastore_uris${hive_metastore_uris} --saveGraph${saveGraph} --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --outputPath${workingDir}/dataset + --outputPath${outputPath}/dataset --preparedInfoPath${workingDir}/preparedInfo @@ -175,7 +203,7 @@ --hive_metastore_uris${hive_metastore_uris} --saveGraph${saveGraph} --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath${workingDir}/otherresearchproduct + --outputPath${outputPath}/otherresearchproduct --preparedInfoPath${workingDir}/preparedInfo @@ -204,7 +232,7 @@ --hive_metastore_uris${hive_metastore_uris} --saveGraph${saveGraph} --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --outputPath${workingDir}/software + --outputPath${outputPath}/software --preparedInfoPath${workingDir}/preparedInfo diff --git a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/oozie_app/workflow.xml index e26c8f28a..ac25b6728 100644 --- a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/oozie_app/workflow.xml @@ -9,12 +9,8 @@ the semantic relationships allowed for propagation - writeUpdate - writes the information found for the update. No double check done if the information is already present - - - saveGraph - writes new version of the graph after the propagation step + outputPath + the output path @@ -27,24 +23,72 @@ - + + + + + + + + - + + + + + + + + ${jobTracker} ${nameNode} ${nameNode}/${sourcePath}/relation - ${nameNode}/${workingDir}/orcid_propagation/relation + ${nameNode}/${outputPath}/relation - + + + + ${jobTracker} + ${nameNode} + ${nameNode}/${sourcePath}/organization + ${nameNode}/${outputPath}/organization + + + + + + + ${jobTracker} + ${nameNode} + ${nameNode}/${sourcePath}/project + ${nameNode}/${outputPath}/project + + + + + + + + ${jobTracker} + ${nameNode} + ${nameNode}/${sourcePath}/datasource + ${nameNode}/${outputPath}/datasource + + + + + + + @@ -222,7 +266,8 @@ --sourcePath${sourcePath}/publication --hive_metastore_uris${hive_metastore_uris} --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --outputPath${workingDir}/orcid_propagation/publication + --outputPath${outputPath}/publication + --saveGraph${saveGraph} @@ -249,7 +294,8 @@ --sourcePath${sourcePath}/dataset --hive_metastore_uris${hive_metastore_uris} --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --outputPath${workingDir}/orcid_propagation/dataset + --outputPath${outputPath}/dataset + --saveGraph${saveGraph} @@ -276,7 +322,8 @@ --sourcePath${sourcePath}/otherresearchproduct --hive_metastore_uris${hive_metastore_uris} --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath${workingDir}/orcid_propagation/otherresearchproduct + --outputPath${outputPath}/otherresearchproduct + --saveGraph${saveGraph} @@ -303,7 +350,8 @@ --sourcePath${sourcePath}/software --hive_metastore_uris${hive_metastore_uris} --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --outputPath${workingDir}/orcid_propagation/software + --outputPath${outputPath}/software + --saveGraph${saveGraph} diff --git a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/projecttoresult/oozie_app/workflow.xml b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/projecttoresult/oozie_app/workflow.xml index e4f791dbc..17bf9adf2 100644 --- a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/projecttoresult/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/projecttoresult/oozie_app/workflow.xml @@ -8,22 +8,10 @@ allowedsemrels the allowed semantics - - sparkDriverMemory - memory for driver process - - - sparkExecutorMemory - memory for individual executor - - - sparkExecutorCores - number of cores used by single executor - - - saveGraph - writes new version of the graph after the propagation step - + + outputPath + the output path + @@ -35,21 +23,27 @@ - - - - - + + + + + + + + - + - + + + + @@ -57,7 +51,7 @@ ${jobTracker} ${nameNode} ${nameNode}/${sourcePath}/relation - ${nameNode}/${workingDir}/relation + ${nameNode}/${outputPath}/relation @@ -68,7 +62,7 @@ ${jobTracker} ${nameNode} ${nameNode}/${sourcePath}/publication - ${nameNode}/${workingDir}/publication + ${nameNode}/${outputPath}/publication @@ -79,7 +73,7 @@ ${jobTracker} ${nameNode} ${nameNode}/${sourcePath}/dataset - ${nameNode}/${workingDir}/dataset + ${nameNode}/${outputPath}/dataset @@ -90,7 +84,7 @@ ${jobTracker} ${nameNode} ${nameNode}/${sourcePath}/otherresearchproduct - ${nameNode}/${workingDir}/otherresearchproduct + ${nameNode}/${outputPath}/otherresearchproduct @@ -101,11 +95,42 @@ ${jobTracker} ${nameNode} ${nameNode}/${sourcePath}/software - ${nameNode}/${workingDir}/software + ${nameNode}/${outputPath}/software + + + ${jobTracker} + ${nameNode} + ${nameNode}/${sourcePath}/organization + ${nameNode}/${outputPath}/organization + + + + + + + ${jobTracker} + ${nameNode} + ${nameNode}/${sourcePath}/project + ${nameNode}/${outputPath}/project + + + + + + + + ${jobTracker} + ${nameNode} + ${nameNode}/${sourcePath}/datasource + ${nameNode}/${outputPath}/datasource + + + + @@ -156,7 +181,7 @@ --saveGraph${saveGraph} --hive_metastore_uris${hive_metastore_uris} - --outputPath${workingDir}/relation + --outputPath${outputPath}/relation --potentialUpdatePath${workingDir}/preparedInfo/potentialUpdates --alreadyLinkedPath${workingDir}/preparedInfo/alreadyLinked diff --git a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/input_preparecommunitytoresult_parameters.json b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/input_preparecommunitytoresult_parameters.json index de472417d..8df509abf 100644 --- a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/input_preparecommunitytoresult_parameters.json +++ b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/input_preparecommunitytoresult_parameters.json @@ -23,12 +23,6 @@ "paramDescription": "true if the spark session is managed, false otherwise", "paramRequired": false }, - { - "paramName":"test", - "paramLongName":"isTest", - "paramDescription": "true if it is executing a test", - "paramRequired": false - }, { "paramName": "out", "paramLongName": "outputPath", diff --git a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/oozie_app/workflow.xml b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/oozie_app/workflow.xml index 20ce6ddda..bf200e242 100644 --- a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/oozie_app/workflow.xml @@ -9,12 +9,8 @@ organization community map - writeUpdate - writes the information found for the update. No double check done if the information is already present - - - saveGraph - writes new version of the graph after the propagation step + outputPath + the output path @@ -26,23 +22,71 @@ - + + + + + + + + + - + + + + + + + + ${jobTracker} ${nameNode} ${nameNode}/${sourcePath}/relation - ${nameNode}/${workingDir}/projecttoresult_propagation/relation + ${nameNode}/${outputPath}/relation - + + + + ${jobTracker} + ${nameNode} + ${nameNode}/${sourcePath}/organization + ${nameNode}/${outputPath}/organization + + + + + + + ${jobTracker} + ${nameNode} + ${nameNode}/${sourcePath}/project + ${nameNode}/${outputPath}/project + + + + + + + + ${jobTracker} + ${nameNode} + ${nameNode}/${sourcePath}/datasource + ${nameNode}/${outputPath}/datasource + + + + + + yarn @@ -100,7 +144,8 @@ --sourcePath${sourcePath}/publication --hive_metastore_uris${hive_metastore_uris} --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --outputPath${workingDir}/publication + --outputPath${outputPath}/publication + --saveGraph${saveGraph} @@ -127,7 +172,8 @@ --sourcePath${sourcePath}/dataset --hive_metastore_uris${hive_metastore_uris} --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --outputPath${workingDir}/dataset + --outputPath${outputPath}/dataset + --saveGraph${saveGraph} @@ -154,7 +200,8 @@ --sourcePath${sourcePath}/otherresearchproduct --hive_metastore_uris${hive_metastore_uris} --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath${workingDir}/otherresearchproduct + --outputPath${outputPath}/otherresearchproduct + --saveGraph${saveGraph} @@ -181,7 +228,8 @@ --sourcePath${sourcePath}/software --hive_metastore_uris${hive_metastore_uris} --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --outputPath${workingDir}/software + --outputPath${outputPath}/software + --saveGraph${saveGraph} diff --git a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/workflow.xml b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/workflow.xml index f1495e03b..7e124f843 100644 --- a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/workflow.xml @@ -275,25 +275,6 @@ --outputPath${outputPath}/relation --datasourceOrganizationPath${workingDir}/preparedInfo/datasourceOrganization --alreadyLinkedPath${workingDir}/preparedInfo/alreadyLinked - dhp-propagation-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.dynamicAllocation.enabled=true - --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - - --sourcePath${sourcePath}/software - --hive_metastore_uris${hive_metastore_uris} - --saveGraph${saveGraph} - --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --outputPath${outputPath}/relation - --datasourceOrganizationPath${workingDir}/preparedInfo/datasourceOrganization - --alreadyLinkedPath${workingDir}/preparedInfo/alreadyLinked