dnet-hadoop/dhp-workflows/dhp-stats-hist-snaps/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-hist-snaps/oozie_app/workflow.xml

160 lines
6.0 KiB
XML

<workflow-app name="Stats Hist Snapshots" xmlns="uri:oozie:workflow:0.5">
<parameters>
<property>
<name>hist_db_name</name>
<description>the target hist database name</description>
</property>
<property>
<name>hist_db_name_prev</name>
<description>the hist database name of previous_month</description>
</property>
<property>
<name>stats_db_name</name>
<description>the stats db name</description>
</property>
<property>
<name>stats_irish_db_name</name>
<description>the stats irish db name</description>
</property>
<property>
<name>monitor_db_name</name>
<description>the monitor db name</description>
</property>
<property>
<name>monitor_irish_db_name</name>
<description>the irish monitor db name</description>
</property>
<property>
<name>hist_db_prod_name</name>
<description>the production db</description>
</property>
<property>
<name>hist_db_shadow_name</name>
<description>the production shadow db</description>
</property>
<property>
<name>hist_date</name>
<description>the snaps date</description>
</property>
<property>
<name>hive_metastore_uris</name>
<description>hive server metastore URIs</description>
</property>
<property>
<name>hive_jdbc_url</name>
<description>hive server jdbc url</description>
</property>
<property>
<name>hive_timeout</name>
<description>the time period, in seconds, after which Hive fails a transaction if a Hive client has not sent a hearbeat. The default value is 300 seconds.</description>
</property>
<property>
<name>hadoop_user_name</name>
<description>user name of the wf owner</description>
</property>
</parameters>
<global>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<configuration>
<property>
<name>hive.metastore.uris</name>
<value>${hive_metastore_uris}</value>
</property>
<property>
<name>hive.txn.timeout</name>
<value>${hive_timeout}</value>
</property>
<property>
<name>mapred.job.queue.name</name>
<value>analytics</value>
</property>
</configuration>
</global>
<start to="resume_from"/>
<decision name="resume_from">
<switch>
<case to="CreateDB">${wf:conf('resumeFrom') eq 'CreateDB'}</case>
<case to="BuildHistSnaps">${wf:conf('resumeFrom') eq 'BuildHistSnaps'}</case>
<case to="BuildHistSnapsIrish">${wf:conf('resumeFrom') eq 'BuildHistSnapsIrish'}</case>
<case to="Step2-copyDataToImpalaCluster">${wf:conf('resumeFrom') eq 'Step2-copyDataToImpalaCluster'}</case>
<case to="Step3-finalizeImpalaCluster">${wf:conf('resumeFrom') eq 'Step3-finalizeImpalaCluster'}</case>
<default to="BuildHistSnaps"/>
</switch>
</decision>
<kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<action name="CreateDB">
<hive2 xmlns="uri:oozie:hive2-action:0.1">
<jdbc-url>${hive_jdbc_url}</jdbc-url>
<script>scripts/CreateDB.sql</script>
<param>hist_db_name=${hist_db_name}</param>
</hive2>
<ok to="BuildHistSnaps"/>
<error to="Kill"/>
</action>
<action name="BuildHistSnaps">
<hive2 xmlns="uri:oozie:hive2-action:0.1">
<jdbc-url>${hive_jdbc_url}</jdbc-url>
<script>scripts/BuildHistSnapsAll.sql</script>
<param>hist_db_name=${hist_db_name}</param>
<param>hist_db_name_prev=${hist_db_name_prev}</param>
<param>stats_db_name=${stats_db_name}</param>
<param>monitor_db_name=${monitor_db_name}</param>
<param>hist_date=${hist_date}</param>
</hive2>
<ok to="BuildHistSnapsIrish"/>
<error to="Kill"/>
</action>
<action name="BuildHistSnapsIrish">
<hive2 xmlns="uri:oozie:hive2-action:0.1">
<jdbc-url>${hive_jdbc_url}</jdbc-url>
<script>scripts/BuildHistSnapsIrish.sql</script>
<param>hist_db_name=${hist_db_name}</param>
<param>hist_db_name_prev=${hist_db_name_prev}</param>
<param>stats_irish_db_name=${stats_irish_db_name}</param>
<param>monitor_irish_db_name=${monitor_irish_db_name}</param>
<param>hist_date=${hist_date}</param>
</hive2>
<ok to="Step2-copyDataToImpalaCluster"/>
<error to="Kill"/>
</action>
<action name="Step2-copyDataToImpalaCluster">
<shell xmlns="uri:oozie:shell-action:0.1">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<exec>copyDataToImpalaCluster.sh</exec>
<argument>${hist_db_name}</argument>
<argument>${hadoop_user_name}</argument>
<file>copyDataToImpalaCluster.sh</file>
</shell>
<ok to="Step3-finalizeImpalaCluster"/>
<error to="Kill"/>
</action>
<action name="Step3-finalizeImpalaCluster">
<shell xmlns="uri:oozie:shell-action:0.1">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<exec>finalizeImpalaCluster.sh</exec>
<argument>${hist_db_name}</argument>
<argument>${hist_db_prod_name}</argument>
<argument>${hist_db_shadow_name}</argument>
<argument>${monitor_db_prod_name}</argument>
<argument>${monitor_irish_db_prod_name}</argument>
<file>finalizeImpalaCluster.sh</file>
</shell>
<ok to="End"/>
<error to="Kill"/>
</action>
<end name="End"/>
</workflow-app>