dnet-hadoop/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionset/h2020programme/oozie_app/workflow.xml

81 lines
3.3 KiB
XML
Raw Normal View History

2020-05-15 18:18:31 +02:00
<workflow-app name="H2020Programme" xmlns="uri:oozie:workflow:0.5">
2020-05-15 18:18:01 +02:00
<parameters>
<property>
2020-05-15 18:18:31 +02:00
<name>projectFileURL</name>
<description>the url where to get the projects file</description>
2020-05-15 18:18:01 +02:00
</property>
<property>
2020-05-15 18:18:31 +02:00
<name>programmeFileURL</name>
<description>the url where to get the programme file</description>
2020-05-15 18:18:01 +02:00
</property>
<property>
2020-05-15 18:18:31 +02:00
<name>outputPath</name>
<description>path where to store the action set</description>
2020-05-15 18:18:01 +02:00
</property>
</parameters>
2020-05-15 18:18:31 +02:00
<start to="deleteoutputpath"/>
2020-05-15 18:18:01 +02:00
<kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
2020-05-15 18:18:31 +02:00
<action name="deleteoutputpath">
2020-05-15 18:18:01 +02:00
<fs>
2020-05-15 18:18:31 +02:00
<delete path='${outputPath}'/>
<mkdir path='${outputPath}'/>
2020-05-15 18:18:01 +02:00
</fs>
2020-05-15 18:18:31 +02:00
<ok to="get_project_file"/>
2020-05-15 18:18:01 +02:00
<error to="Kill"/>
</action>
2020-05-15 18:18:31 +02:00
<action name="get_project_file">
2020-05-15 18:18:01 +02:00
<java>
2020-05-15 18:18:31 +02:00
<main-class>eu.dnetlib.dhp.actionset.h2020programme.GetFile</main-class>
<arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
<arg>--fileUrl</arg><arg>${projectFileURL}</arg>
<arg>--hdfsPath</arg><arg>${workingDir}/projects.csv</arg>
2020-05-15 18:18:01 +02:00
</java>
2020-05-15 18:18:31 +02:00
<ok to="shell_get_programme_file"/>
2020-05-15 18:18:01 +02:00
<error to="Kill"/>
</action>
2020-05-15 18:18:31 +02:00
<action name="shell_get_programme_file">
<java>
<main-class>eu.dnetlib.dhp.actionset.h2020programme.GetFile</main-class>
<arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
<arg>--fileUrl</arg><arg>${programmeFileURL}</arg>
<arg>--hdfsPath</arg><arg>${workingDir}/programme.csv</arg>
</java>
<ok to="End"/>
2020-05-15 18:18:01 +02:00
<error to="Kill"/>
</action>
2020-05-15 18:18:31 +02:00
<!-- <action name="create_actionset">-->
<!-- <spark xmlns="uri:oozie:spark-action:0.2">-->
<!-- <master>yarn</master>-->
<!-- <mode>cluster</mode>-->
<!-- <name>ProjectProgrammeAS</name>-->
<!-- <class>eu.dnetlib.dhp.actionset.h2020programme</class>-->
<!-- <jar>dhp-aggregation-${projectVersion}.jar</jar>-->
<!-- <spark-opts>-->
<!-- &#45;&#45;executor-cores=${sparkExecutorCores}-->
<!-- &#45;&#45;executor-memory=${sparkExecutorMemory}-->
<!-- &#45;&#45;driver-memory=${sparkDriverMemory}-->
<!-- &#45;&#45;conf spark.extraListeners=${spark2ExtraListeners}-->
<!-- &#45;&#45;conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}-->
<!-- &#45;&#45;conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}-->
<!-- &#45;&#45;conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}-->
<!-- &#45;&#45;conf spark.sql.shuffle.partitions=3840-->
<!-- </spark-opts>-->
<!-- <arg>&#45;&#45;sourcePath</arg><arg>${sourcePath}/relation</arg>-->
<!-- <arg>&#45;&#45;outputPath</arg><arg>${outputPath}/relation</arg>-->
<!-- <arg>&#45;&#45;hdfsPath</arg><arg>${workingDir}/blacklist</arg>-->
<!-- <arg>&#45;&#45;mergesPath</arg><arg>${workingDir}/mergesRelation</arg>-->
<!-- </spark>-->
<!-- <ok to="End"/>-->
<!-- <error to="Kill"/>-->
<!-- </action>-->
2020-05-15 18:18:01 +02:00
<end name="End"/>
</workflow-app>