2020-06-16 21:30:53 +02:00
<workflow-app name= "Graph Stats" xmlns= "uri:oozie:workflow:0.5" >
2020-04-01 21:05:35 +02:00
<parameters >
<property >
2020-06-13 12:35:53 +02:00
<name > stats_db_name</name>
<description > the target stats database name</description>
2020-04-01 21:05:35 +02:00
</property>
<property >
2020-06-13 12:35:53 +02:00
<name > openaire_db_name</name>
<description > the original graph database name</description>
</property>
<property >
<name > external_stats_db_name</name>
<value > stats_ext</value>
<description > the external stats that should be added since they are not included in the graph database</description>
</property>
2020-06-15 18:57:40 +02:00
<property >
<name > stats_db_shadow_name</name>
<description > the name of the shadow schema</description>
</property>
2020-06-13 12:35:53 +02:00
<property >
<name > hive_metastore_uris</name>
<description > hive server metastore URIs</description>
</property>
<property >
<name > hive_jdbc_url</name>
<description > hive server jdbc url</description>
2020-04-29 14:29:27 +02:00
</property>
2020-06-05 21:29:54 +02:00
<property >
<name > hive_timeout</name>
<description > the time period, in seconds, after which Hive fails a transaction if a Hive client has not sent a hearbeat. The default value is 300 seconds.</description>
</property>
2020-04-01 21:05:35 +02:00
</parameters>
2020-04-29 14:29:27 +02:00
<global >
<job-tracker > ${jobTracker}</job-tracker>
<name-node > ${nameNode}</name-node>
<configuration >
<property >
<name > hive.metastore.uris</name>
2020-06-13 12:35:53 +02:00
<value > ${hive_metastore_uris}</value>
2020-04-29 14:29:27 +02:00
</property>
2020-06-05 21:29:54 +02:00
<property >
<name > hive.txn.timeout</name>
<value > ${hive_timeout}</value>
</property>
2020-04-29 14:29:27 +02:00
</configuration>
</global>
2020-11-17 22:26:47 +01:00
<start to= "Step1" />
2020-04-01 21:05:35 +02:00
<kill name= "Kill" >
<message > Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
2020-06-13 12:35:53 +02:00
<action name= "Step1" >
2020-04-01 21:05:35 +02:00
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
2020-06-13 12:35:53 +02:00
<jdbc-url > ${hive_jdbc_url}</jdbc-url>
2020-04-01 21:05:35 +02:00
<script > scripts/step1.sql</script>
2020-06-13 12:35:53 +02:00
<param > stats_db_name=${stats_db_name}</param>
<param > openaire_db_name=${openaire_db_name}</param>
2020-04-01 21:05:35 +02:00
</hive2>
2020-06-11 20:01:14 +02:00
<ok to= "Step2" />
2020-04-01 21:05:35 +02:00
<error to= "Kill" />
</action>
2020-06-13 12:35:53 +02:00
2020-06-11 20:01:14 +02:00
<action name= "Step2" >
2020-04-01 21:05:35 +02:00
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
2020-06-13 12:35:53 +02:00
<jdbc-url > ${hive_jdbc_url}</jdbc-url>
2020-06-11 20:01:14 +02:00
<script > scripts/step2.sql</script>
2020-06-13 12:35:53 +02:00
<param > stats_db_name=${stats_db_name}</param>
<param > openaire_db_name=${openaire_db_name}</param>
2020-04-01 21:05:35 +02:00
</hive2>
<ok to= "Step3" />
<error to= "Kill" />
</action>
2020-06-15 18:57:40 +02:00
2020-04-01 21:05:35 +02:00
<action name= "Step3" >
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
2020-06-13 12:35:53 +02:00
<jdbc-url > ${hive_jdbc_url}</jdbc-url>
2020-04-01 21:05:35 +02:00
<script > scripts/step3.sql</script>
2020-06-13 12:35:53 +02:00
<param > stats_db_name=${stats_db_name}</param>
<param > openaire_db_name=${openaire_db_name}</param>
2020-04-01 21:05:35 +02:00
</hive2>
2020-06-11 20:01:14 +02:00
<ok to= "Step4" />
2020-04-01 21:05:35 +02:00
<error to= "Kill" />
</action>
2020-06-13 12:35:53 +02:00
2020-06-11 20:01:14 +02:00
<action name= "Step4" >
2020-06-15 18:57:40 +02:00
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
2020-06-13 12:35:53 +02:00
<jdbc-url > ${hive_jdbc_url}</jdbc-url>
2020-06-11 20:01:14 +02:00
<script > scripts/step4.sql</script>
2020-06-13 12:35:53 +02:00
<param > stats_db_name=${stats_db_name}</param>
<param > openaire_db_name=${openaire_db_name}</param>
2020-06-11 20:01:14 +02:00
</hive2>
<ok to= "Step5" />
<error to= "Kill" />
</action>
2020-06-13 12:35:53 +02:00
2020-06-11 20:01:14 +02:00
<action name= "Step5" >
2020-06-13 12:35:53 +02:00
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
<jdbc-url > ${hive_jdbc_url}</jdbc-url>
2020-06-11 20:01:14 +02:00
<script > scripts/step5.sql</script>
2020-06-13 12:35:53 +02:00
<param > stats_db_name=${stats_db_name}</param>
<param > openaire_db_name=${openaire_db_name}</param>
2020-06-11 20:01:14 +02:00
</hive2>
<ok to= "Step6" />
<error to= "Kill" />
</action>
2020-06-13 12:35:53 +02:00
2020-06-11 20:01:14 +02:00
<action name= "Step6" >
2020-06-13 12:35:53 +02:00
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
<jdbc-url > ${hive_jdbc_url}</jdbc-url>
2020-06-11 20:01:14 +02:00
<script > scripts/step6.sql</script>
2020-06-13 12:35:53 +02:00
<param > stats_db_name=${stats_db_name}</param>
<param > openaire_db_name=${openaire_db_name}</param>
2020-06-11 20:01:14 +02:00
</hive2>
<ok to= "Step7" />
<error to= "Kill" />
</action>
2020-06-13 12:35:53 +02:00
2020-06-11 20:01:14 +02:00
<action name= "Step7" >
2020-06-13 12:35:53 +02:00
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
<jdbc-url > ${hive_jdbc_url}</jdbc-url>
2020-06-11 20:01:14 +02:00
<script > scripts/step7.sql</script>
2020-06-13 12:35:53 +02:00
<param > stats_db_name=${stats_db_name}</param>
<param > openaire_db_name=${openaire_db_name}</param>
2020-06-11 20:01:14 +02:00
</hive2>
<ok to= "Step8" />
<error to= "Kill" />
</action>
2020-06-13 12:35:53 +02:00
2020-06-11 20:01:14 +02:00
<action name= "Step8" >
2020-06-13 12:35:53 +02:00
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
<jdbc-url > ${hive_jdbc_url}</jdbc-url>
2020-06-11 20:01:14 +02:00
<script > scripts/step8.sql</script>
2020-06-13 12:35:53 +02:00
<param > stats_db_name=${stats_db_name}</param>
<param > openaire_db_name=${openaire_db_name}</param>
2020-06-11 20:01:14 +02:00
</hive2>
<ok to= "Step9" />
<error to= "Kill" />
</action>
2020-06-13 12:35:53 +02:00
2020-06-11 20:01:14 +02:00
<action name= "Step9" >
2020-06-13 12:35:53 +02:00
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
<jdbc-url > ${hive_jdbc_url}</jdbc-url>
2020-06-11 20:01:14 +02:00
<script > scripts/step9.sql</script>
2020-06-13 12:35:53 +02:00
<param > stats_db_name=${stats_db_name}</param>
<param > openaire_db_name=${openaire_db_name}</param>
2020-06-11 20:01:14 +02:00
</hive2>
<ok to= "Step10" />
<error to= "Kill" />
</action>
2020-06-13 12:35:53 +02:00
2020-06-11 20:01:14 +02:00
<action name= "Step10" >
2020-06-13 12:35:53 +02:00
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
<jdbc-url > ${hive_jdbc_url}</jdbc-url>
2020-06-11 20:01:14 +02:00
<script > scripts/step10.sql</script>
2020-06-13 12:35:53 +02:00
<param > stats_db_name=${stats_db_name}</param>
<param > openaire_db_name=${openaire_db_name}</param>
2020-06-15 18:57:40 +02:00
<param > external_stats_db_name=${external_stats_db_name}</param>
2020-04-01 21:05:35 +02:00
</hive2>
<ok to= "Step11" />
<error to= "Kill" />
2020-06-15 18:57:40 +02:00
</action>
2020-04-01 21:05:35 +02:00
<action name= "Step11" >
2020-06-13 12:35:53 +02:00
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
<jdbc-url > ${hive_jdbc_url}</jdbc-url>
2020-04-01 21:05:35 +02:00
<script > scripts/step11.sql</script>
2020-06-13 12:35:53 +02:00
<param > stats_db_name=${stats_db_name}</param>
<param > openaire_db_name=${openaire_db_name}</param>
<param > external_stats_db_name=${external_stats_db_name}</param>
2020-04-01 21:05:35 +02:00
</hive2>
2020-06-11 20:01:14 +02:00
<ok to= "Step12" />
2020-04-01 21:05:35 +02:00
<error to= "Kill" />
2020-06-15 18:57:40 +02:00
</action>
2020-06-11 20:01:14 +02:00
<action name= "Step12" >
2020-06-15 18:57:40 +02:00
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
2020-06-13 12:35:53 +02:00
<jdbc-url > ${hive_jdbc_url}</jdbc-url>
2020-06-11 20:01:14 +02:00
<script > scripts/step12.sql</script>
2020-06-13 12:35:53 +02:00
<param > stats_db_name=${stats_db_name}</param>
<param > openaire_db_name=${openaire_db_name}</param>
2020-04-01 21:05:35 +02:00
</hive2>
<ok to= "Step13" />
<error to= "Kill" />
2020-06-15 18:57:40 +02:00
</action>
2020-06-13 12:35:53 +02:00
2020-04-01 21:05:35 +02:00
<action name= "Step13" >
2020-04-29 14:29:27 +02:00
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
2020-06-13 12:35:53 +02:00
<jdbc-url > ${hive_jdbc_url}</jdbc-url>
2020-04-01 21:05:35 +02:00
<script > scripts/step13.sql</script>
2020-06-13 12:35:53 +02:00
<param > stats_db_name=${stats_db_name}</param>
<param > openaire_db_name=${openaire_db_name}</param>
2020-04-01 21:05:35 +02:00
</hive2>
<ok to= "Step14" />
<error to= "Kill" />
</action>
2020-06-13 12:35:53 +02:00
2020-04-01 21:05:35 +02:00
<action name= "Step14" >
2020-06-15 18:57:40 +02:00
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
<jdbc-url > ${hive_jdbc_url}</jdbc-url>
<script > scripts/step14.sql</script>
<param > stats_db_name=${stats_db_name}</param>
<param > openaire_db_name=${openaire_db_name}</param>
</hive2>
<ok to= "Step15" />
<error to= "Kill" />
</action>
<action name= "Step15" >
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
<jdbc-url > ${hive_jdbc_url}</jdbc-url>
<script > scripts/step15.sql</script>
<param > stats_db_name=${stats_db_name}</param>
<param > openaire_db_name=${openaire_db_name}</param>
</hive2>
2020-07-03 14:24:57 +02:00
<ok to= "Step16" />
2020-07-02 01:42:30 +02:00
<error to= "Kill" />
</action>
2020-07-03 14:24:57 +02:00
<action name= "Step16" >
2020-07-02 01:42:30 +02:00
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
<jdbc-url > ${hive_jdbc_url}</jdbc-url>
2020-07-03 14:24:57 +02:00
<script > scripts/step16.sql</script>
2020-07-02 01:42:30 +02:00
<param > stats_db_name=${stats_db_name}</param>
<param > openaire_db_name=${openaire_db_name}</param>
</hive2>
<ok to= "Step16_5" />
<error to= "Kill" />
</action>
<action name= "Step16_5" >
2020-06-15 18:57:40 +02:00
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
<jdbc-url > ${hive_jdbc_url}</jdbc-url>
2020-07-02 01:42:30 +02:00
<script > scripts/step16_5.sql</script>
2020-06-30 01:13:08 +02:00
<param > stats_db_name=${stats_db_name}</param>
<param > openaire_db_name=${openaire_db_name}</param>
</hive2>
2020-11-10 16:11:12 +01:00
<ok to= "Step16_6" />
<error to= "Kill" />
</action>
<action name= "Step16_6" >
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
<jdbc-url > ${hive_jdbc_url}</jdbc-url>
<script > scripts/step16_6.sql</script>
<param > stats_db_name=${stats_db_name}</param>
<param > openaire_db_name=${openaire_db_name}</param>
</hive2>
2020-06-30 01:13:08 +02:00
<ok to= "Step17" />
<error to= "Kill" />
</action>
<action name= "Step17" >
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
<jdbc-url > ${hive_jdbc_url}</jdbc-url>
<script > scripts/step17.sql</script>
2020-06-15 18:57:40 +02:00
<param > stats_db_name=${stats_db_name}</param>
<param > stats_db_shadow_name=${stats_db_shadow_name}</param>
</hive2>
2020-07-01 12:23:59 +02:00
<ok to= "Step18" />
2020-06-15 18:57:40 +02:00
<error to= "Kill" />
</action>
2020-06-30 01:13:08 +02:00
<action name= "Step18" >
2020-06-11 20:01:14 +02:00
<shell xmlns= "uri:oozie:shell-action:0.1" >
<job-tracker > ${jobTracker}</job-tracker>
<name-node > ${nameNode}</name-node>
<exec > impala-shell.sh</exec>
2020-06-13 12:35:53 +02:00
<argument > ${stats_db_name}</argument>
2020-06-30 01:13:08 +02:00
<argument > step18.sql</argument>
2020-11-11 15:56:46 +01:00
<argument > ${wf:appPath()}/scripts/step18.sql</argument>
2020-06-11 20:01:14 +02:00
<file > impala-shell.sh</file>
</shell>
2020-11-10 16:11:12 +01:00
<ok to= "Step19" />
2020-04-01 21:05:35 +02:00
<error to= "Kill" />
</action>
2020-06-13 12:35:53 +02:00
2020-11-10 16:11:12 +01:00
<action name= "Step19" >
<shell xmlns= "uri:oozie:shell-action:0.1" >
<job-tracker > ${jobTracker}</job-tracker>
<name-node > ${nameNode}</name-node>
<exec > impala-shell.sh</exec>
<argument > ${stats_db_shadow_name}</argument>
<argument > step19.sql</argument>
2020-11-11 15:56:46 +01:00
<argument > ${wf:appPath()}/scripts/step19.sql</argument>
2020-11-10 16:11:12 +01:00
<file > impala-shell.sh</file>
</shell>
<ok to= "End" />
<error to= "Kill" />
</action>
2020-04-01 21:05:35 +02:00
<end name= "End" />
</workflow-app>