From 80cf43b9c8382e98be51bcad3896fbc9013f81fd Mon Sep 17 00:00:00 2001 From: pjacewicz Date: Wed, 1 Apr 2020 18:51:25 +0200 Subject: [PATCH] [dhp-actionmanager] promoting workflow added --- .../wf/dataset/oozie_app/workflow.xml | 178 ++++++++++++ .../wf/datasource/oozie_app/workflow.xml | 125 +++++++++ .../wf/main/oozie_app/import.txt | 9 + .../wf/main/oozie_app/workflow.xml | 265 ++++++++++++++++++ .../wf/organization/oozie_app/workflow.xml | 125 +++++++++ .../oozie_app/workflow.xml | 177 ++++++++++++ .../wf/project/oozie_app/workflow.xml | 125 +++++++++ .../wf/publication/oozie_app/workflow.xml | 178 ++++++++++++ .../wf/relation/oozie_app/workflow.xml | 126 +++++++++ .../wf/software/oozie_app/workflow.xml | 177 ++++++++++++ 10 files changed, 1485 insertions(+) create mode 100644 dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/dataset/oozie_app/workflow.xml create mode 100644 dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/datasource/oozie_app/workflow.xml create mode 100644 dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/main/oozie_app/import.txt create mode 100644 dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/main/oozie_app/workflow.xml create mode 100644 dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/organization/oozie_app/workflow.xml create mode 100644 dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/otherresearchproduct/oozie_app/workflow.xml create mode 100644 dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/project/oozie_app/workflow.xml create mode 100644 dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/publication/oozie_app/workflow.xml create mode 100644 dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/relation/oozie_app/workflow.xml create mode 100644 dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/software/oozie_app/workflow.xml diff --git a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/dataset/oozie_app/workflow.xml b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/dataset/oozie_app/workflow.xml new file mode 100644 index 000000000..faee2b0bf --- /dev/null +++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/dataset/oozie_app/workflow.xml @@ -0,0 +1,178 @@ + + + + activePromoteDatasetActionPayload + TODO + + + activePromoteResultActionPayload + TODO + + + inputGraphPath + TODO + + + inputActionPayloadRootPath + TODO + + + outputGraphPath + TODO + + + mergeAndGetStrategy + TODO + + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorCores + number of cores used by single executor + + + oozieActionShareLibForSpark2 + oozie action sharelib for spark 2.* + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + spark 2.* extra listeners classname + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + spark 2.* sql query execution listeners classname + + + spark2YarnHistoryServerAddress + spark 2.* yarn history server address + + + spark2EventLogDir + spark 2.* event log dir location + + + + + ${jobTracker} + ${nameNode} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + ${(activePromoteDatasetActionPayload eq "true") and + (fs:exists(concat(concat(concat(concat(wf:conf('nameNode'),'/'),wf:conf('inputGraphPath')),'/'),'dataset')) eq "true") and + (fs:exists(concat(concat(concat(concat(wf:conf('nameNode'),'/'),wf:conf('inputActionPayloadRootPath')),'/'),'clazz=eu.dnetlib.dhp.schema.oaf.Dataset')) eq "true")} + + + + + + + + yarn-cluster + cluster + PromoteDatasetActionPayloadForDatasetTable + eu.dnetlib.dhp.actionmanager.promote.PromoteActionPayloadForGraphTableJob + dhp-actionmanager-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=2560 + + --inputGraphTablePath${inputGraphPath}/dataset + --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Dataset + --inputActionPayloadPath${inputActionPayloadRootPath}/clazz=eu.dnetlib.dhp.schema.oaf.Dataset + --actionPayloadClassNameeu.dnetlib.dhp.schema.oaf.Dataset + --outputGraphTablePath${workingDir}/dataset + --mergeAndGetStrategy${mergeAndGetStrategy} + + + + + + + + -pb + ${inputGraphPath}/dataset + ${workingDir}/dataset + + + + + + + + + ${(activePromoteResultActionPayload eq "true") and + (fs:exists(concat(concat(concat(concat(wf:conf('nameNode'),'/'),wf:conf('inputActionPayloadRootPath')),'/'),'clazz=eu.dnetlib.dhp.schema.oaf.Result')) eq "true")} + + + + + + + + yarn-cluster + cluster + PromoteResultActionPayloadForDatasetTable + eu.dnetlib.dhp.actionmanager.promote.PromoteActionPayloadForGraphTableJob + dhp-actionmanager-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=2560 + + --inputGraphTablePath${workingDir}/dataset + --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Dataset + --inputActionPayloadPath${inputActionPayloadRootPath}/clazz=eu.dnetlib.dhp.schema.oaf.Result + --actionPayloadClassNameeu.dnetlib.dhp.schema.oaf.Result + --outputGraphTablePath${outputGraphPath}/dataset + --mergeAndGetStrategy${mergeAndGetStrategy} + + + + + + + + -pb + ${workingDir}/dataset + ${outputGraphPath}/dataset + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/datasource/oozie_app/workflow.xml b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/datasource/oozie_app/workflow.xml new file mode 100644 index 000000000..545091a73 --- /dev/null +++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/datasource/oozie_app/workflow.xml @@ -0,0 +1,125 @@ + + + + activePromoteDatasourceActionPayload + TODO + + + inputGraphPath + TODO + + + inputActionPayloadRootPath + TODO + + + outputGraphPath + TODO + + + mergeAndGetStrategy + TODO + + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorCores + number of cores used by single executor + + + oozieActionShareLibForSpark2 + oozie action sharelib for spark 2.* + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + spark 2.* extra listeners classname + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + spark 2.* sql query execution listeners classname + + + spark2YarnHistoryServerAddress + spark 2.* yarn history server address + + + spark2EventLogDir + spark 2.* event log dir location + + + + + ${jobTracker} + ${nameNode} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + ${(activePromoteDatasourceActionPayload eq "true") and + (fs:exists(concat(concat(concat(concat(wf:conf('nameNode'),'/'),wf:conf('inputGraphPath')),'/'),'datasource')) eq "true") and + (fs:exists(concat(concat(concat(concat(wf:conf('nameNode'),'/'),wf:conf('inputActionPayloadRootPath')),'/'),'clazz=eu.dnetlib.dhp.schema.oaf.Datasource')) eq "true")} + + + + + + + + yarn-cluster + cluster + PromoteDatasourceActionPayloadForDatasourceTable + eu.dnetlib.dhp.actionmanager.promote.PromoteActionPayloadForGraphTableJob + dhp-actionmanager-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --inputGraphTablePath${inputGraphPath}/datasource + --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Datasource + --inputActionPayloadPath${inputActionPayloadRootPath}/clazz=eu.dnetlib.dhp.schema.oaf.Datasource + --actionPayloadClassNameeu.dnetlib.dhp.schema.oaf.Datasource + --outputGraphTablePath${outputGraphPath}/datasource + --mergeAndGetStrategy${mergeAndGetStrategy} + + + + + + + + -pb + ${inputGraphPath}/datasource + ${outputGraphPath}/datasource + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/main/oozie_app/import.txt b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/main/oozie_app/import.txt new file mode 100644 index 000000000..dd8f5e14e --- /dev/null +++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/main/oozie_app/import.txt @@ -0,0 +1,9 @@ +## This is a classpath-based import file (this header is required) +promote_action_payload_for_dataset_table classpath eu/dnetlib/dhp/actionmanager/wf/dataset/oozie_app +promote_action_payload_for_datasource_table classpath eu/dnetlib/dhp/actionmanager/wf/datasource/oozie_app +promote_action_payload_for_organization_table classpath eu/dnetlib/dhp/actionmanager/wf/organization/oozie_app +promote_action_payload_for_otherresearchproduct_table classpath eu/dnetlib/dhp/actionmanager/wf/otherresearchproduct/oozie_app +promote_action_payload_for_project_table classpath eu/dnetlib/dhp/actionmanager/wf/project/oozie_app +promote_action_payload_for_publication_table classpath eu/dnetlib/dhp/actionmanager/wf/publication/oozie_app +promote_action_payload_for_relation_table classpath eu/dnetlib/dhp/actionmanager/wf/relation/oozie_app +promote_action_payload_for_software_table classpath eu/dnetlib/dhp/actionmanager/wf/software/oozie_app diff --git a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/main/oozie_app/workflow.xml b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/main/oozie_app/workflow.xml new file mode 100644 index 000000000..9150ebc06 --- /dev/null +++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/main/oozie_app/workflow.xml @@ -0,0 +1,265 @@ + + + + activePromoteDatasetActionPayload + TODO + + + activePromoteDatasourceActionPayload + TODO + + + activePromoteOrganizationActionPayload + TODO + + + activePromoteOtherResearchProductActionPayload + TODO + + + activePromoteProjectActionPayload + TODO + + + activePromotePublicationActionPayload + TODO + + + activePromoteRelationActionPayload + TODO + + + activePromoteResultActionPayload + TODO + + + activePromoteSoftwareActionPayload + TODO + + + inputGraphPath + TODO + + + inputActionSetPaths + TODO + + + outputGraphPath + TODO + + + mergeAndGetStrategy + TODO + + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorCores + number of cores used by single executor + + + oozieActionShareLibForSpark2 + oozie action sharelib for spark 2.* + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + spark 2.* extra listeners classname + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + spark 2.* sql query execution listeners classname + + + spark2YarnHistoryServerAddress + spark 2.* yarn history server address + + + spark2EventLogDir + spark 2.* event log dir location + + + + + ${jobTracker} + ${nameNode} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + yarn-cluster + cluster + PartitionActionSetsByPayloadType + eu.dnetlib.dhp.actionmanager.partition.PartitionActionSetsByPayloadTypeJob + dhp-actionmanager-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --inputActionSetPaths${inputActionSetPaths} + --outputPath${workingDir}/action_payload_by_type + + + + + + + + + + + + + + + + + + + ${wf:appPath()}/promote_action_payload_for_dataset_table + + + + inputActionPayloadRootPath + ${workingDir}/action_payload_by_type + + + + + + + + + + ${wf:appPath()}/promote_action_payload_for_datasource_table + + + + inputActionPayloadRootPath + ${workingDir}/action_payload_by_type + + + + + + + + + + ${wf:appPath()}/promote_action_payload_for_organization_table + + + + inputActionPayloadRootPath + ${workingDir}/action_payload_by_type + + + + + + + + + + ${wf:appPath()}/promote_action_payload_for_otherresearchproduct_table + + + + inputActionPayloadRootPath + ${workingDir}/action_payload_by_type + + + + + + + + + + ${wf:appPath()}/promote_action_payload_for_project_table + + + + inputActionPayloadRootPath + ${workingDir}/action_payload_by_type + + + + + + + + + + ${wf:appPath()}/promote_action_payload_for_publication_table + + + + inputActionPayloadRootPath + ${workingDir}/action_payload_by_type + + + + + + + + + + ${wf:appPath()}/promote_action_payload_for_relation_table + + + + inputActionPayloadRootPath + ${workingDir}/action_payload_by_type + + + + + + + + + + ${wf:appPath()}/promote_action_payload_for_software_table + + + + inputActionPayloadRootPath + ${workingDir}/action_payload_by_type + + + + + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/organization/oozie_app/workflow.xml b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/organization/oozie_app/workflow.xml new file mode 100644 index 000000000..d67698ce5 --- /dev/null +++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/organization/oozie_app/workflow.xml @@ -0,0 +1,125 @@ + + + + activePromoteOrganizationActionPayload + TODO + + + inputGraphPath + TODO + + + inputActionPayloadRootPath + TODO + + + outputGraphPath + TODO + + + mergeAndGetStrategy + TODO + + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorCores + number of cores used by single executor + + + oozieActionShareLibForSpark2 + oozie action sharelib for spark 2.* + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + spark 2.* extra listeners classname + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + spark 2.* sql query execution listeners classname + + + spark2YarnHistoryServerAddress + spark 2.* yarn history server address + + + spark2EventLogDir + spark 2.* event log dir location + + + + + ${jobTracker} + ${nameNode} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + ${(activePromoteOrganizationActionPayload eq "true") and + (fs:exists(concat(concat(concat(concat(wf:conf('nameNode'),'/'),wf:conf('inputGraphPath')),'/'),'organization')) eq "true") and + (fs:exists(concat(concat(concat(concat(wf:conf('nameNode'),'/'),wf:conf('inputActionPayloadRootPath')),'/'),'clazz=eu.dnetlib.dhp.schema.oaf.Organization')) eq "true")} + + + + + + + + yarn-cluster + cluster + PromoteOrganizationActionPayloadForOrganizationTable + eu.dnetlib.dhp.actionmanager.promote.PromoteActionPayloadForGraphTableJob + dhp-actionmanager-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --inputGraphTablePath${inputGraphPath}/organization + --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Organization + --inputActionPayloadPath${inputActionPayloadRootPath}/clazz=eu.dnetlib.dhp.schema.oaf.Organization + --actionPayloadClassNameeu.dnetlib.dhp.schema.oaf.Organization + --outputGraphTablePath${outputGraphPath}/organization + --mergeAndGetStrategy${mergeAndGetStrategy} + + + + + + + + -pb + ${inputGraphPath}/organization + ${outputGraphPath}/organization + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/otherresearchproduct/oozie_app/workflow.xml b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/otherresearchproduct/oozie_app/workflow.xml new file mode 100644 index 000000000..66b51ea75 --- /dev/null +++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/otherresearchproduct/oozie_app/workflow.xml @@ -0,0 +1,177 @@ + + + + activePromoteOtherResearchProductActionPayload + TODO + + + activePromoteResultActionPayload + TODO + + + inputGraphPath + TODO + + + inputActionPayloadRootPath + TODO + + + outputGraphPath + TODO + + + mergeAndGetStrategy + TODO + + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorCores + number of cores used by single executor + + + oozieActionShareLibForSpark2 + oozie action sharelib for spark 2.* + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + spark 2.* extra listeners classname + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + spark 2.* sql query execution listeners classname + + + spark2YarnHistoryServerAddress + spark 2.* yarn history server address + + + spark2EventLogDir + spark 2.* event log dir location + + + + + ${jobTracker} + ${nameNode} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + ${(activePromoteOtherResearchProductActionPayload eq "true") and + (fs:exists(concat(concat(concat(concat(wf:conf('nameNode'),'/'),wf:conf('inputGraphPath')),'/'),'otherresearchproduct')) eq "true") and + (fs:exists(concat(concat(concat(concat(wf:conf('nameNode'),'/'),wf:conf('inputActionPayloadRootPath')),'/'),'clazz=eu.dnetlib.dhp.schema.oaf.OtherResearchProduct')) eq "true")} + + + + + + + + yarn-cluster + cluster + PromoteOtherResearchProductActionPayloadForOtherResearchProductTable + eu.dnetlib.dhp.actionmanager.promote.PromoteActionPayloadForGraphTableJob + dhp-actionmanager-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --inputGraphTablePath${inputGraphPath}/otherresearchproduct + --graphTableClassNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct + --inputActionPayloadPath${inputActionPayloadRootPath}/clazz=eu.dnetlib.dhp.schema.oaf.OtherResearchProduct + --actionPayloadClassNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct + --outputGraphTablePath${workingDir}/otherresearchproduct + --mergeAndGetStrategy${mergeAndGetStrategy} + + + + + + + + -pb + ${inputGraphPath}/otherresearchproduct + ${workingDir}/otherresearchproduct + + + + + + + + + ${(activePromoteResultActionPayload eq "true") and + (fs:exists(concat(concat(concat(concat(wf:conf('nameNode'),'/'),wf:conf('inputActionPayloadRootPath')),'/'),'clazz=eu.dnetlib.dhp.schema.oaf.Result')) eq "true")} + + + + + + + + yarn-cluster + cluster + PromoteResultActionPayloadForOtherResearchProductTable + eu.dnetlib.dhp.actionmanager.promote.PromoteActionPayloadForGraphTableJob + dhp-actionmanager-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=2560 + + --inputGraphTablePath${workingDir}/otherresearchproduct + --graphTableClassNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct + --inputActionPayloadPath${inputActionPayloadRootPath}/clazz=eu.dnetlib.dhp.schema.oaf.Result + --actionPayloadClassNameeu.dnetlib.dhp.schema.oaf.Result + --outputGraphTablePath${outputGraphPath}/otherresearchproduct + --mergeAndGetStrategy${mergeAndGetStrategy} + + + + + + + + -pb + ${workingDir}/otherresearchproduct + ${outputGraphPath}/otherresearchproduct + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/project/oozie_app/workflow.xml b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/project/oozie_app/workflow.xml new file mode 100644 index 000000000..a84b35e14 --- /dev/null +++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/project/oozie_app/workflow.xml @@ -0,0 +1,125 @@ + + + + activePromoteProjectActionPayload + TODO + + + inputGraphPath + TODO + + + inputActionPayloadRootPath + TODO + + + outputGraphPath + TODO + + + mergeAndGetStrategy + TODO + + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorCores + number of cores used by single executor + + + oozieActionShareLibForSpark2 + oozie action sharelib for spark 2.* + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + spark 2.* extra listeners classname + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + spark 2.* sql query execution listeners classname + + + spark2YarnHistoryServerAddress + spark 2.* yarn history server address + + + spark2EventLogDir + spark 2.* event log dir location + + + + + ${jobTracker} + ${nameNode} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + ${(activePromoteProjectActionPayload eq "true") and + (fs:exists(concat(concat(concat(concat(wf:conf('nameNode'),'/'),wf:conf('inputGraphPath')),'/'),'project')) eq "true") and + (fs:exists(concat(concat(concat(concat(wf:conf('nameNode'),'/'),wf:conf('inputActionPayloadRootPath')),'/'),'clazz=eu.dnetlib.dhp.schema.oaf.Project')) eq "true")} + + + + + + + + yarn-cluster + cluster + PromoteProjectActionPayloadForProjectTable + eu.dnetlib.dhp.actionmanager.promote.PromoteActionPayloadForGraphTableJob + dhp-actionmanager-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --inputGraphTablePath${inputGraphPath}/project + --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Project + --inputActionPayloadPath${inputActionPayloadRootPath}/clazz=eu.dnetlib.dhp.schema.oaf.Project + --actionPayloadClassNameeu.dnetlib.dhp.schema.oaf.Project + --outputGraphTablePath${outputGraphPath}/project + --mergeAndGetStrategy${mergeAndGetStrategy} + + + + + + + + -pb + ${inputGraphPath}/project + ${outputGraphPath}/project + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/publication/oozie_app/workflow.xml b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/publication/oozie_app/workflow.xml new file mode 100644 index 000000000..229909dd6 --- /dev/null +++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/publication/oozie_app/workflow.xml @@ -0,0 +1,178 @@ + + + + activePromotePublicationActionPayload + TODO + + + activePromoteResultActionPayload + TODO + + + inputGraphPath + TODO + + + inputActionPayloadRootPath + TODO + + + outputGraphPath + TODO + + + mergeAndGetStrategy + TODO + + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorCores + number of cores used by single executor + + + oozieActionShareLibForSpark2 + oozie action sharelib for spark 2.* + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + spark 2.* extra listeners classname + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + spark 2.* sql query execution listeners classname + + + spark2YarnHistoryServerAddress + spark 2.* yarn history server address + + + spark2EventLogDir + spark 2.* event log dir location + + + + + ${jobTracker} + ${nameNode} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + ${(activePromotePublicationActionPayload eq "true") and + (fs:exists(concat(concat(concat(concat(wf:conf('nameNode'),'/'),wf:conf('inputGraphPath')),'/'),'publication')) eq "true") and + (fs:exists(concat(concat(concat(concat(wf:conf('nameNode'),'/'),wf:conf('inputActionPayloadRootPath')),'/'),'clazz=eu.dnetlib.dhp.schema.oaf.Publication')) eq "true")} + + + + + + + + yarn-cluster + cluster + PromotePublicationActionPayloadForPublicationTable + eu.dnetlib.dhp.actionmanager.promote.PromoteActionPayloadForGraphTableJob + dhp-actionmanager-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=2560 + + --inputGraphTablePath${inputGraphPath}/publication + --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Publication + --inputActionPayloadPath${inputActionPayloadRootPath}/clazz=eu.dnetlib.dhp.schema.oaf.Publication + --actionPayloadClassNameeu.dnetlib.dhp.schema.oaf.Publication + --outputGraphTablePath${workingDir}/publication + --mergeAndGetStrategy${mergeAndGetStrategy} + + + + + + + + -pb + ${inputGraphPath}/publication + ${workingDir}/publication + + + + + + + + + ${(activePromoteResultActionPayload eq "true") and + (fs:exists(concat(concat(concat(concat(wf:conf('nameNode'),'/'),wf:conf('inputActionPayloadRootPath')),'/'),'clazz=eu.dnetlib.dhp.schema.oaf.Result')) eq "true")} + + + + + + + + yarn-cluster + cluster + PromoteResultActionPayloadForPublicationTable + eu.dnetlib.dhp.actionmanager.promote.PromoteActionPayloadForGraphTableJob + dhp-actionmanager-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=2560 + + --inputGraphTablePath${workingDir}/publication + --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Publication + --inputActionPayloadPath${inputActionPayloadRootPath}/clazz=eu.dnetlib.dhp.schema.oaf.Result + --actionPayloadClassNameeu.dnetlib.dhp.schema.oaf.Result + --outputGraphTablePath${outputGraphPath}/publication + --mergeAndGetStrategy${mergeAndGetStrategy} + + + + + + + + -pb + ${workingDir}/publication + ${outputGraphPath}/publication + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/relation/oozie_app/workflow.xml b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/relation/oozie_app/workflow.xml new file mode 100644 index 000000000..6059c024c --- /dev/null +++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/relation/oozie_app/workflow.xml @@ -0,0 +1,126 @@ + + + + activePromoteRelationActionPayload + TODO + + + inputGraphPath + TODO + + + inputActionPayloadRootPath + TODO + + + outputGraphPath + TODO + + + mergeAndGetStrategy + TODO + + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorCores + number of cores used by single executor + + + oozieActionShareLibForSpark2 + oozie action sharelib for spark 2.* + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + spark 2.* extra listeners classname + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + spark 2.* sql query execution listeners classname + + + spark2YarnHistoryServerAddress + spark 2.* yarn history server address + + + spark2EventLogDir + spark 2.* event log dir location + + + + + ${jobTracker} + ${nameNode} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + ${(activePromoteRelationActionPayload eq "true") and + (fs:exists(concat(concat(concat(concat(wf:conf('nameNode'),'/'),wf:conf('inputGraphPath')),'/'),'relation')) eq "true") and + (fs:exists(concat(concat(concat(concat(wf:conf('nameNode'),'/'),wf:conf('inputActionPayloadRootPath')),'/'),'clazz=eu.dnetlib.dhp.schema.oaf.Relation')) eq "true")} + + + + + + + + yarn-cluster + cluster + PromoteRelationActionPayloadForRelationTable + eu.dnetlib.dhp.actionmanager.promote.PromoteActionPayloadForGraphTableJob + dhp-actionmanager-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=2560 + + --inputGraphTablePath${inputGraphPath}/relation + --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Relation + --inputActionPayloadPath${inputActionPayloadRootPath}/clazz=eu.dnetlib.dhp.schema.oaf.Relation + --actionPayloadClassNameeu.dnetlib.dhp.schema.oaf.Relation + --outputGraphTablePath${outputGraphPath}/relation + --mergeAndGetStrategy${mergeAndGetStrategy} + + + + + + + + -pb + ${inputGraphPath}/relation + ${outputGraphPath}/relation + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/software/oozie_app/workflow.xml b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/software/oozie_app/workflow.xml new file mode 100644 index 000000000..68167524d --- /dev/null +++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/software/oozie_app/workflow.xml @@ -0,0 +1,177 @@ + + + + activePromoteSoftwareActionPayload + TODO + + + activePromoteResultActionPayload + TODO + + + inputGraphPath + TODO + + + inputActionPayloadRootPath + TODO + + + outputGraphPath + TODO + + + mergeAndGetStrategy + TODO + + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorCores + number of cores used by single executor + + + oozieActionShareLibForSpark2 + oozie action sharelib for spark 2.* + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + spark 2.* extra listeners classname + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + spark 2.* sql query execution listeners classname + + + spark2YarnHistoryServerAddress + spark 2.* yarn history server address + + + spark2EventLogDir + spark 2.* event log dir location + + + + + ${jobTracker} + ${nameNode} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + ${(activePromoteSoftwareActionPayload eq "true") and + (fs:exists(concat(concat(concat(concat(wf:conf('nameNode'),'/'),wf:conf('inputGraphPath')),'/'),'software')) eq "true") and + (fs:exists(concat(concat(concat(concat(wf:conf('nameNode'),'/'),wf:conf('inputActionPayloadRootPath')),'/'),'clazz=eu.dnetlib.dhp.schema.oaf.Software')) eq "true")} + + + + + + + + yarn-cluster + cluster + PromoteSoftwareActionPayloadForSoftwareTable + eu.dnetlib.dhp.actionmanager.promote.PromoteActionPayloadForGraphTableJob + dhp-actionmanager-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --inputGraphTablePath${inputGraphPath}/software + --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Software + --inputActionPayloadPath${inputActionPayloadRootPath}/clazz=eu.dnetlib.dhp.schema.oaf.Software + --actionPayloadClassNameeu.dnetlib.dhp.schema.oaf.Software + --outputGraphTablePath${workingDir}/software + --mergeAndGetStrategy${mergeAndGetStrategy} + + + + + + + + -pb + ${inputGraphPath}/software + ${workingDir}/software + + + + + + + + + ${(activePromoteResultActionPayload eq "true") and + (fs:exists(concat(concat(concat(concat(wf:conf('nameNode'),'/'),wf:conf('inputActionPayloadRootPath')),'/'),'clazz=eu.dnetlib.dhp.schema.oaf.Result')) eq "true")} + + + + + + + + yarn-cluster + cluster + PromoteResultActionPayloadForSoftwareTable + eu.dnetlib.dhp.actionmanager.promote.PromoteActionPayloadForGraphTableJob + dhp-actionmanager-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=2560 + + --inputGraphTablePath${workingDir}/software + --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Software + --inputActionPayloadPath${inputActionPayloadRootPath}/clazz=eu.dnetlib.dhp.schema.oaf.Result + --actionPayloadClassNameeu.dnetlib.dhp.schema.oaf.Result + --outputGraphTablePath${outputGraphPath}/software + --mergeAndGetStrategy${mergeAndGetStrategy} + + + + + + + + -pb + ${workingDir}/software + ${outputGraphPath}/software + + + + + + + \ No newline at end of file