2020-06-16 21:30:53 +02:00
<workflow-app name= "Graph Stats" xmlns= "uri:oozie:workflow:0.5" >
2020-04-01 21:05:35 +02:00
<parameters >
<property >
2020-06-13 12:35:53 +02:00
<name > stats_db_name</name>
<description > the target stats database name</description>
2020-04-01 21:05:35 +02:00
</property>
<property >
2020-06-13 12:35:53 +02:00
<name > openaire_db_name</name>
<description > the original graph database name</description>
</property>
<property >
<name > external_stats_db_name</name>
<value > stats_ext</value>
<description > the external stats that should be added since they are not included in the graph database</description>
</property>
2020-06-15 18:57:40 +02:00
<property >
<name > stats_db_shadow_name</name>
<description > the name of the shadow schema</description>
</property>
2021-02-17 01:11:55 +01:00
<property >
<name > monitor_db_name</name>
<description > the target monitor db name</description>
</property>
<property >
<name > monitor_db_shadow_name</name>
<description > the name of the shadow monitor db</description>
</property>
2021-05-28 14:11:46 +02:00
<property >
<name > observatory_db_name</name>
<description > the target monitor db name</description>
</property>
<property >
<name > observatory_db_shadow_name</name>
<description > the name of the shadow monitor db</description>
</property>
2020-12-04 12:04:25 +01:00
<property >
<name > stats_tool_api_url</name>
<description > The url of the API of the stats tool. Is used to trigger the cache update.</description>
</property>
2020-06-13 12:35:53 +02:00
<property >
<name > hive_metastore_uris</name>
<description > hive server metastore URIs</description>
</property>
<property >
<name > hive_jdbc_url</name>
<description > hive server jdbc url</description>
2020-04-29 14:29:27 +02:00
</property>
2020-06-05 21:29:54 +02:00
<property >
<name > hive_timeout</name>
<description > the time period, in seconds, after which Hive fails a transaction if a Hive client has not sent a hearbeat. The default value is 300 seconds.</description>
</property>
2021-03-29 15:59:58 +02:00
<property >
<name > context_api_url</name>
<description > the base url of the context api (https://services.openaire.eu/openaire)</description>
</property>
2020-04-01 21:05:35 +02:00
</parameters>
2020-04-29 14:29:27 +02:00
<global >
<job-tracker > ${jobTracker}</job-tracker>
<name-node > ${nameNode}</name-node>
<configuration >
<property >
<name > hive.metastore.uris</name>
2020-06-13 12:35:53 +02:00
<value > ${hive_metastore_uris}</value>
2020-04-29 14:29:27 +02:00
</property>
2020-06-05 21:29:54 +02:00
<property >
<name > hive.txn.timeout</name>
<value > ${hive_timeout}</value>
</property>
2020-04-29 14:29:27 +02:00
</configuration>
</global>
2021-02-18 15:32:54 +01:00
<start to= "Step1" />
2020-04-01 21:05:35 +02:00
<kill name= "Kill" >
<message > Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
2020-06-13 12:35:53 +02:00
<action name= "Step1" >
2020-04-01 21:05:35 +02:00
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
2020-06-13 12:35:53 +02:00
<jdbc-url > ${hive_jdbc_url}</jdbc-url>
2020-04-01 21:05:35 +02:00
<script > scripts/step1.sql</script>
2020-06-13 12:35:53 +02:00
<param > stats_db_name=${stats_db_name}</param>
<param > openaire_db_name=${openaire_db_name}</param>
2020-04-01 21:05:35 +02:00
</hive2>
2020-06-11 20:01:14 +02:00
<ok to= "Step2" />
2020-04-01 21:05:35 +02:00
<error to= "Kill" />
</action>
2020-06-13 12:35:53 +02:00
2020-06-11 20:01:14 +02:00
<action name= "Step2" >
2020-04-01 21:05:35 +02:00
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
2020-06-13 12:35:53 +02:00
<jdbc-url > ${hive_jdbc_url}</jdbc-url>
2020-06-11 20:01:14 +02:00
<script > scripts/step2.sql</script>
2020-06-13 12:35:53 +02:00
<param > stats_db_name=${stats_db_name}</param>
<param > openaire_db_name=${openaire_db_name}</param>
2020-04-01 21:05:35 +02:00
</hive2>
<ok to= "Step3" />
<error to= "Kill" />
</action>
2020-06-15 18:57:40 +02:00
2020-04-01 21:05:35 +02:00
<action name= "Step3" >
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
2020-06-13 12:35:53 +02:00
<jdbc-url > ${hive_jdbc_url}</jdbc-url>
2020-04-01 21:05:35 +02:00
<script > scripts/step3.sql</script>
2020-06-13 12:35:53 +02:00
<param > stats_db_name=${stats_db_name}</param>
<param > openaire_db_name=${openaire_db_name}</param>
2020-04-01 21:05:35 +02:00
</hive2>
2020-06-11 20:01:14 +02:00
<ok to= "Step4" />
2020-04-01 21:05:35 +02:00
<error to= "Kill" />
</action>
2020-06-13 12:35:53 +02:00
2020-06-11 20:01:14 +02:00
<action name= "Step4" >
2020-06-15 18:57:40 +02:00
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
2020-06-13 12:35:53 +02:00
<jdbc-url > ${hive_jdbc_url}</jdbc-url>
2020-06-11 20:01:14 +02:00
<script > scripts/step4.sql</script>
2020-06-13 12:35:53 +02:00
<param > stats_db_name=${stats_db_name}</param>
<param > openaire_db_name=${openaire_db_name}</param>
2020-06-11 20:01:14 +02:00
</hive2>
<ok to= "Step5" />
<error to= "Kill" />
</action>
2020-06-13 12:35:53 +02:00
2020-06-11 20:01:14 +02:00
<action name= "Step5" >
2020-06-13 12:35:53 +02:00
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
<jdbc-url > ${hive_jdbc_url}</jdbc-url>
2020-06-11 20:01:14 +02:00
<script > scripts/step5.sql</script>
2020-06-13 12:35:53 +02:00
<param > stats_db_name=${stats_db_name}</param>
<param > openaire_db_name=${openaire_db_name}</param>
2020-06-11 20:01:14 +02:00
</hive2>
<ok to= "Step6" />
<error to= "Kill" />
</action>
2020-06-13 12:35:53 +02:00
2020-06-11 20:01:14 +02:00
<action name= "Step6" >
2020-06-13 12:35:53 +02:00
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
<jdbc-url > ${hive_jdbc_url}</jdbc-url>
2020-06-11 20:01:14 +02:00
<script > scripts/step6.sql</script>
2020-06-13 12:35:53 +02:00
<param > stats_db_name=${stats_db_name}</param>
<param > openaire_db_name=${openaire_db_name}</param>
2020-06-11 20:01:14 +02:00
</hive2>
<ok to= "Step7" />
<error to= "Kill" />
</action>
2020-06-13 12:35:53 +02:00
2020-06-11 20:01:14 +02:00
<action name= "Step7" >
2020-06-13 12:35:53 +02:00
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
<jdbc-url > ${hive_jdbc_url}</jdbc-url>
2020-06-11 20:01:14 +02:00
<script > scripts/step7.sql</script>
2020-06-13 12:35:53 +02:00
<param > stats_db_name=${stats_db_name}</param>
<param > openaire_db_name=${openaire_db_name}</param>
2020-06-11 20:01:14 +02:00
</hive2>
<ok to= "Step8" />
<error to= "Kill" />
</action>
2020-06-13 12:35:53 +02:00
2020-06-11 20:01:14 +02:00
<action name= "Step8" >
2020-06-13 12:35:53 +02:00
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
<jdbc-url > ${hive_jdbc_url}</jdbc-url>
2020-06-11 20:01:14 +02:00
<script > scripts/step8.sql</script>
2020-06-13 12:35:53 +02:00
<param > stats_db_name=${stats_db_name}</param>
<param > openaire_db_name=${openaire_db_name}</param>
2020-06-11 20:01:14 +02:00
</hive2>
<ok to= "Step9" />
<error to= "Kill" />
</action>
2020-06-13 12:35:53 +02:00
2020-06-11 20:01:14 +02:00
<action name= "Step9" >
2020-06-13 12:35:53 +02:00
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
<jdbc-url > ${hive_jdbc_url}</jdbc-url>
2020-06-11 20:01:14 +02:00
<script > scripts/step9.sql</script>
2020-06-13 12:35:53 +02:00
<param > stats_db_name=${stats_db_name}</param>
<param > openaire_db_name=${openaire_db_name}</param>
2020-06-11 20:01:14 +02:00
</hive2>
<ok to= "Step10" />
<error to= "Kill" />
</action>
2020-06-13 12:35:53 +02:00
2020-06-11 20:01:14 +02:00
<action name= "Step10" >
2020-06-13 12:35:53 +02:00
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
<jdbc-url > ${hive_jdbc_url}</jdbc-url>
2020-06-11 20:01:14 +02:00
<script > scripts/step10.sql</script>
2020-06-13 12:35:53 +02:00
<param > stats_db_name=${stats_db_name}</param>
<param > openaire_db_name=${openaire_db_name}</param>
2020-06-15 18:57:40 +02:00
<param > external_stats_db_name=${external_stats_db_name}</param>
2020-04-01 21:05:35 +02:00
</hive2>
<ok to= "Step11" />
<error to= "Kill" />
2020-06-15 18:57:40 +02:00
</action>
2020-04-01 21:05:35 +02:00
<action name= "Step11" >
2020-06-13 12:35:53 +02:00
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
<jdbc-url > ${hive_jdbc_url}</jdbc-url>
2020-04-01 21:05:35 +02:00
<script > scripts/step11.sql</script>
2020-06-13 12:35:53 +02:00
<param > stats_db_name=${stats_db_name}</param>
<param > openaire_db_name=${openaire_db_name}</param>
<param > external_stats_db_name=${external_stats_db_name}</param>
2020-04-01 21:05:35 +02:00
</hive2>
2020-06-11 20:01:14 +02:00
<ok to= "Step12" />
2020-04-01 21:05:35 +02:00
<error to= "Kill" />
2020-06-15 18:57:40 +02:00
</action>
2020-06-11 20:01:14 +02:00
<action name= "Step12" >
2020-06-15 18:57:40 +02:00
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
2020-06-13 12:35:53 +02:00
<jdbc-url > ${hive_jdbc_url}</jdbc-url>
2020-06-11 20:01:14 +02:00
<script > scripts/step12.sql</script>
2020-06-13 12:35:53 +02:00
<param > stats_db_name=${stats_db_name}</param>
<param > openaire_db_name=${openaire_db_name}</param>
2020-04-01 21:05:35 +02:00
</hive2>
<ok to= "Step13" />
<error to= "Kill" />
2020-06-15 18:57:40 +02:00
</action>
2020-06-13 12:35:53 +02:00
2020-04-01 21:05:35 +02:00
<action name= "Step13" >
2020-04-29 14:29:27 +02:00
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
2020-06-13 12:35:53 +02:00
<jdbc-url > ${hive_jdbc_url}</jdbc-url>
2020-04-01 21:05:35 +02:00
<script > scripts/step13.sql</script>
2020-06-13 12:35:53 +02:00
<param > stats_db_name=${stats_db_name}</param>
<param > openaire_db_name=${openaire_db_name}</param>
2020-04-01 21:05:35 +02:00
</hive2>
<ok to= "Step14" />
<error to= "Kill" />
</action>
2020-06-13 12:35:53 +02:00
2020-04-01 21:05:35 +02:00
<action name= "Step14" >
2020-06-15 18:57:40 +02:00
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
<jdbc-url > ${hive_jdbc_url}</jdbc-url>
<script > scripts/step14.sql</script>
<param > stats_db_name=${stats_db_name}</param>
<param > openaire_db_name=${openaire_db_name}</param>
</hive2>
<ok to= "Step15" />
<error to= "Kill" />
</action>
<action name= "Step15" >
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
<jdbc-url > ${hive_jdbc_url}</jdbc-url>
<script > scripts/step15.sql</script>
<param > stats_db_name=${stats_db_name}</param>
<param > openaire_db_name=${openaire_db_name}</param>
</hive2>
2021-09-08 22:07:58 +02:00
<ok to= "Step15_5" />
<error to= "Kill" />
</action>
<action name= "Step15_5" >
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
<jdbc-url > ${hive_jdbc_url}</jdbc-url>
<script > scripts/step15_5.sql</script>
<param > stats_db_name=${stats_db_name}</param>
<param > openaire_db_name=${openaire_db_name}</param>
</hive2>
2021-09-13 13:33:23 +02:00
<ok to= "Contexts" />
<error to= "Kill" />
</action>
<action name= "Contexts" >
<shell xmlns= "uri:oozie:shell-action:0.1" >
<job-tracker > ${jobTracker}</job-tracker>
<name-node > ${nameNode}</name-node>
<exec > contexts.sh</exec>
<argument > ${context_api_url}</argument>
<argument > ${stats_db_name}</argument>
<file > contexts.sh</file>
</shell>
2021-09-08 13:08:43 +02:00
<ok to= "Step16-createIndicatorsTables" />
2020-07-02 01:42:30 +02:00
<error to= "Kill" />
</action>
2021-09-08 13:08:43 +02:00
<action name= "Step16-createIndicatorsTables" >
<shell xmlns= "uri:oozie:shell-action:0.1" >
<job-tracker > ${jobTracker}</job-tracker>
<name-node > ${nameNode}</name-node>
<exec > indicators.sh</exec>
<argument > ${stats_db_name}</argument>
<argument > ${wf:appPath()}/scripts/step16-createIndicatorsTables.sql</argument>
<file > indicators.sh</file>
</shell>
<ok to= "Step16_1-definitions" />
<error to= "Kill" />
</action>
<action name= "Step16_1-definitions" >
2020-07-02 01:42:30 +02:00
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
<jdbc-url > ${hive_jdbc_url}</jdbc-url>
2021-09-08 13:08:43 +02:00
<script > scripts/step16_1-definitions.sql</script>
2020-07-02 01:42:30 +02:00
<param > stats_db_name=${stats_db_name}</param>
<param > openaire_db_name=${openaire_db_name}</param>
</hive2>
<ok to= "Step16_5" />
<error to= "Kill" />
</action>
<action name= "Step16_5" >
2020-06-15 18:57:40 +02:00
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
<jdbc-url > ${hive_jdbc_url}</jdbc-url>
2020-07-02 01:42:30 +02:00
<script > scripts/step16_5.sql</script>
2020-06-30 01:13:08 +02:00
<param > stats_db_name=${stats_db_name}</param>
<param > openaire_db_name=${openaire_db_name}</param>
</hive2>
2021-09-08 13:08:43 +02:00
<ok to= "Step19-finalize" />
2020-04-01 21:05:35 +02:00
<error to= "Kill" />
</action>
2020-06-13 12:35:53 +02:00
2021-09-08 13:08:43 +02:00
<action name= "Step19-finalize" >
2020-11-10 16:11:12 +01:00
<shell xmlns= "uri:oozie:shell-action:0.1" >
<job-tracker > ${jobTracker}</job-tracker>
<name-node > ${nameNode}</name-node>
2021-02-18 16:23:34 +01:00
<exec > finalizedb.sh</exec>
2021-02-19 02:04:27 +01:00
<argument > ${stats_db_name}</argument>
2020-11-10 16:11:12 +01:00
<argument > ${stats_db_shadow_name}</argument>
2021-02-18 16:23:34 +01:00
<file > finalizedb.sh</file>
2020-11-10 16:11:12 +01:00
</shell>
2021-02-18 15:42:22 +01:00
<ok to= "step20-createMonitorDB" />
2020-11-29 23:48:10 +01:00
<error to= "Kill" />
</action>
2021-02-18 15:42:22 +01:00
<action name= "step20-createMonitorDB" >
2020-11-29 23:48:10 +01:00
<shell xmlns= "uri:oozie:shell-action:0.1" >
<job-tracker > ${jobTracker}</job-tracker>
<name-node > ${nameNode}</name-node>
2021-02-18 15:42:22 +01:00
<exec > monitor.sh</exec>
<argument > ${stats_db_name}</argument>
<argument > ${monitor_db_name}</argument>
<argument > ${monitor_db_shadow_name}</argument>
<argument > ${wf:appPath()}/scripts/step20-createMonitorDB.sql</argument>
<file > monitor.sh</file>
2020-11-29 23:48:10 +01:00
</shell>
2021-09-21 02:07:58 +02:00
<ok to= "step21-createObservatoryDB-pre" />
<error to= "Kill" />
</action>
<action name= "step21-createObservatoryDB-pre" >
<shell xmlns= "uri:oozie:shell-action:0.1" >
<job-tracker > ${jobTracker}</job-tracker>
<name-node > ${nameNode}</name-node>
<exec > observatory-pre.sh</exec>
<argument > ${stats_db_name}</argument>
<argument > ${observatory_db_name}</argument>
<argument > ${observatory_db_shadow_name}</argument>
<file > observatory-pre.sh</file>
</shell>
2021-05-28 14:11:46 +02:00
<ok to= "step21-createObservatoryDB" />
<error to= "Kill" />
</action>
<action name= "step21-createObservatoryDB" >
2021-09-21 02:07:58 +02:00
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
<jdbc-url > ${hive_jdbc_url}</jdbc-url>
<script > scripts/step21-createObservatoryDB.sql</script>
<param > stats_db_name=${stats_db_name}</param>
<param > observatory_db_name=${observatory_db_name}</param>
</hive2>
<ok to= "step21-createObservatoryDB-post" />
<error to= "Kill" />
</action>
<action name= "step21-createObservatoryDB-post" >
2021-05-28 14:11:46 +02:00
<shell xmlns= "uri:oozie:shell-action:0.1" >
<job-tracker > ${jobTracker}</job-tracker>
<name-node > ${nameNode}</name-node>
2021-09-21 02:07:58 +02:00
<exec > observatory-post.sh</exec>
2021-05-28 14:11:46 +02:00
<argument > ${stats_db_name}</argument>
<argument > ${observatory_db_name}</argument>
<argument > ${observatory_db_shadow_name}</argument>
2021-09-21 02:07:58 +02:00
<file > observatory-post.sh</file>
2021-05-28 14:11:46 +02:00
</shell>
<ok to= "Step22" />
2021-02-17 01:11:55 +01:00
<error to= "Kill" />
</action>
2021-05-28 14:11:46 +02:00
<action name= "Step22" >
2021-02-17 01:11:55 +01:00
<shell xmlns= "uri:oozie:shell-action:0.1" >
<job-tracker > ${jobTracker}</job-tracker>
<name-node > ${nameNode}</name-node>
2021-02-18 15:42:22 +01:00
<exec > updateCache.sh</exec>
<argument > ${stats_tool_api_url}</argument>
<file > updateCache.sh</file>
2021-02-17 01:11:55 +01:00
</shell>
2020-11-10 16:11:12 +01:00
<ok to= "End" />
<error to= "Kill" />
</action>
2020-04-01 21:05:35 +02:00
<end name= "End" />
2021-05-28 14:11:46 +02:00
</workflow-app>