2020-05-11 15:14:56 +02:00
6 changed files with 186 additions and 0 deletions
--- a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromremrel/input_orcidtoresult_parameters.json
+++ b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromremrel/input_orcidtoresult_parameters.json
@ -0,0 +1,20 @@
+[
+  {
+    "paramName":"mt",
+    "paramLongName":"master",
+    "paramDescription": "should be local or yarn",
+    "paramRequired": true
+  },
+  {
+    "paramName":"s",
+    "paramLongName":"sourcePath",
+    "paramDescription": "the path of the sequencial file to read",
+    "paramRequired": true
+  },
+  {
+    "paramName":"as",
+    "paramLongName":"allowedsemrels",
+    "paramDescription": "the allowed sematinc relations for propagation",
+    "paramRequired": true
+  }
+]
--- a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromremrel/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromremrel/oozie_app/config-default.xml
@ -0,0 +1,18 @@
+<configuration>
+    <property>
+        <name>jobTracker</name>
+        <value>yarnRM</value>
+    </property>
+    <property>
+        <name>nameNode</name>
+        <value>hdfs://nameservice1</value>
+    </property>
+    <property>
+        <name>oozie.use.system.libpath</name>
+        <value>true</value>
+    </property>
+    <property>
+        <name>oozie.action.sharelib.for.spark</name>
+        <value>spark2</value>
+    </property>
+</configuration>
--- a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromremrel/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromremrel/oozie_app/workflow.xml
@ -0,0 +1,55 @@
+<workflow-app name="orcid_to_result_from_semrel_propagation" xmlns="uri:oozie:workflow:0.5">
+<parameters>
+    <property>
+        <name>sourcePath</name>
+        <description>the source path</description>
+    </property>
+    <property>
+        <name>allowedsemrels</name>
+        <description>the semantic relationships allowed for propagation</description>
+    </property>
+    <property>
+        <name>sparkDriverMemory</name>
+        <description>memory for driver process</description>
+    </property>
+    <property>
+        <name>sparkExecutorMemory</name>
+        <description>memory for individual executor</description>
+    </property>
+    <property>
+        <name>sparkExecutorCores</name>
+        <description>number of cores used by single executor</description>
+    </property>
+</parameters>
+
+<start to="OrcidToResultFromSemRelPropagation"/>
+
+<kill name="Kill">
+    <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
+</kill>
+
+<action name="OrcidToResultFromSemRelPropagation">
+    <spark xmlns="uri:oozie:spark-action:0.2">
+        <job-tracker>${jobTracker}</job-tracker>
+        <name-node>${nameNode}</name-node>
+        <master>yarn-cluster</master>
+        <mode>cluster</mode>
+        <name>AffiliatioPropagation</name>
+        <class>eu.dnetlib.dhp.resulttoorganizationfrominstrepo.SparkResultToOrganizationFromIstRepoJob</class>
+        <jar>dhp-propagation-${projectVersion}.jar</jar>
+        <spark-opts>--executor-memory ${sparkExecutorMemory}
+            --executor-cores ${sparkExecutorCores}
+            --driver-memory=${sparkDriverMemory}
+            --conf spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener"
+            --conf spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener"
+        </spark-opts>
+        <arg>-mt</arg> <arg>yarn-cluster</arg>
+        <arg>--sourcePath</arg><arg>${sourcePath}</arg>
+        <arg>--allowedsemrels</arg><arg>${allowedsemrels}</arg>
+    </spark>
+    <ok to="End"/>
+    <error to="Kill"/>
+</action>
+
+<end name="End"/>
+</workflow-app>
--- a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_propagationresultcommunityfromsemrel_parameters.json
+++ b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_propagationresultcommunityfromsemrel_parameters.json
@ -0,0 +1,20 @@
+[
+  {
+    "paramName":"mt",
+    "paramLongName":"master",
+    "paramDescription": "should be local or yarn",
+    "paramRequired": true
+  },
+  {
+    "paramName":"s",
+    "paramLongName":"sourcePath",
+    "paramDescription": "the path of the sequencial file to read",
+    "paramRequired": true
+  },
+  {
+    "paramName":"as",
+    "paramLongName":"allowedsemrels",
+    "paramDescription": "the allowed sematinc relations for propagation",
+    "paramRequired": true
+  }
+]
--- a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/config-default.xml
@ -0,0 +1,18 @@
+<configuration>
+    <property>
+        <name>jobTracker</name>
+        <value>yarnRM</value>
+    </property>
+    <property>
+        <name>nameNode</name>
+        <value>hdfs://nameservice1</value>
+    </property>
+    <property>
+        <name>oozie.use.system.libpath</name>
+        <value>true</value>
+    </property>
+    <property>
+        <name>oozie.action.sharelib.for.spark</name>
+        <value>spark2</value>
+    </property>
+</configuration>
--- a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/workflow.xml
@ -0,0 +1,55 @@
+<workflow-app name="result_to_community_from_semrel_propagation" xmlns="uri:oozie:workflow:0.5">
+<parameters>
+    <property>
+        <name>sourcePath</name>
+        <description>the source path</description>
+    </property>
+    <property>
+        <name>allowedsemrels</name>
+        <description>the semantic relationships allowed for propagation</description>
+    </property>
+    <property>
+        <name>sparkDriverMemory</name>
+        <description>memory for driver process</description>
+    </property>
+    <property>
+        <name>sparkExecutorMemory</name>
+        <description>memory for individual executor</description>
+    </property>
+    <property>
+        <name>sparkExecutorCores</name>
+        <description>number of cores used by single executor</description>
+    </property>
+</parameters>
+
+<start to="ResultToCommunityFromSemRelPropagation"/>
+
+<kill name="Kill">
+    <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
+</kill>
+
+<action name="ResultToCommunityFromSemRelPropagation">
+    <spark xmlns="uri:oozie:spark-action:0.2">
+        <job-tracker>${jobTracker}</job-tracker>
+        <name-node>${nameNode}</name-node>
+        <master>yarn-cluster</master>
+        <mode>cluster</mode>
+        <name>AffiliatioPropagation</name>
+        <class>eu.dnetlib.dhp.resulttoorganizationfrominstrepo.SparkResultToOrganizationFromIstRepoJob</class>
+        <jar>dhp-propagation-${projectVersion}.jar</jar>
+        <spark-opts>--executor-memory ${sparkExecutorMemory}
+            --executor-cores ${sparkExecutorCores}
+            --driver-memory=${sparkDriverMemory}
+            --conf spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener"
+            --conf spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener"
+        </spark-opts>
+        <arg>-mt</arg> <arg>yarn-cluster</arg>
+        <arg>--sourcePath</arg><arg>${sourcePath}</arg>
+        <arg>--allowedsemrels</arg><arg>${allowedsemrels}</arg>
+    </spark>
+    <ok to="End"/>
+    <error to="Kill"/>
+</action>
+
+<end name="End"/>
+</workflow-app>