2023-02-06 12:18:53 +01:00
<workflow-app name= "Graph Stats Hive" xmlns= "uri:oozie:workflow:0.5" >
2020-04-01 21:05:35 +02:00
<parameters >
<property >
2020-06-13 12:35:53 +02:00
<name > stats_db_name</name>
<description > the target stats database name</description>
2020-04-01 21:05:35 +02:00
</property>
<property >
2020-06-13 12:35:53 +02:00
<name > openaire_db_name</name>
<description > the original graph database name</description>
</property>
<property >
<name > external_stats_db_name</name>
<description > the external stats that should be added since they are not included in the graph database</description>
</property>
2023-05-18 08:33:05 +02:00
<property >
<name > usage_stats_db_name</name>
<description > the usage statistics database name</description>
</property>
2020-06-15 18:57:40 +02:00
<property >
<name > stats_db_shadow_name</name>
<description > the name of the shadow schema</description>
</property>
2021-02-17 01:11:55 +01:00
<property >
<name > monitor_db_name</name>
<description > the target monitor db name</description>
</property>
<property >
<name > monitor_db_shadow_name</name>
<description > the name of the shadow monitor db</description>
</property>
2021-05-28 14:11:46 +02:00
<property >
<name > observatory_db_name</name>
<description > the target monitor db name</description>
</property>
<property >
<name > observatory_db_shadow_name</name>
<description > the name of the shadow monitor db</description>
</property>
2023-05-18 08:33:05 +02:00
<property >
<name > usage_stats_db_shadow_name</name>
<description > the name of the shadow usage stats db</description>
</property>
2020-12-04 12:04:25 +01:00
<property >
<name > stats_tool_api_url</name>
<description > The url of the API of the stats tool. Is used to trigger the cache update.</description>
</property>
2020-06-13 12:35:53 +02:00
<property >
<name > hive_metastore_uris</name>
<description > hive server metastore URIs</description>
</property>
<property >
<name > hive_jdbc_url</name>
<description > hive server jdbc url</description>
2020-04-29 14:29:27 +02:00
</property>
2020-06-05 21:29:54 +02:00
<property >
<name > hive_timeout</name>
<description > the time period, in seconds, after which Hive fails a transaction if a Hive client has not sent a hearbeat. The default value is 300 seconds.</description>
</property>
2021-03-29 15:59:58 +02:00
<property >
<name > context_api_url</name>
<description > the base url of the context api (https://services.openaire.eu/openaire)</description>
</property>
2023-02-06 12:18:53 +01:00
<property >
<name > hadoop_user_name</name>
<description > user name of the wf owner</description>
</property>
2024-01-26 01:04:48 +01:00
<property >
<name > sparkSqlWarehouseDir</name>
</property>
<!-- General oozie workflow properties -->
<property >
<name > sparkClusterOpts</name>
<value > --conf spark.network.timeout=600 --conf spark.extraListeners= --conf spark.sql.queryExecutionListeners= --conf spark.yarn.historyServer.address=http://iis-cdh5-test-m3.ocean.icm.edu.pl:18088 --conf spark.eventLog.dir=hdfs://nameservice1/user/spark/applicationHistory</value>
<description > spark cluster-wide options</description>
</property>
<property >
<name > sparkResourceOpts</name>
2024-01-26 20:19:52 +01:00
<value > --executor-memory=6G --conf spark.executor.memoryOverhead=4G --executor-cores=6 --driver-memory=8G --driver-cores=4</value>
2024-01-26 01:04:48 +01:00
<description > spark resource options</description>
</property>
<property >
<name > sparkApplicationOpts</name>
<value > --conf spark.sql.shuffle.partitions=3840</value>
<description > spark resource options</description>
</property>
2020-04-01 21:05:35 +02:00
</parameters>
2020-04-29 14:29:27 +02:00
<global >
<job-tracker > ${jobTracker}</job-tracker>
<name-node > ${nameNode}</name-node>
<configuration >
<property >
<name > hive.metastore.uris</name>
2020-06-13 12:35:53 +02:00
<value > ${hive_metastore_uris}</value>
2020-04-29 14:29:27 +02:00
</property>
2020-06-05 21:29:54 +02:00
<property >
2024-01-26 20:19:52 +01:00
<name > hive.txn.timeout</name>
<value > ${hive_timeout}</value>
2020-06-05 21:29:54 +02:00
</property>
2024-01-25 15:06:34 +01:00
<property >
<name > hive.mapjoin.followby.gby.localtask.max.memory.usage</name>
<value > 0.80</value>
</property>
2024-01-26 01:04:48 +01:00
<property >
<name > oozie.action.sharelib.for.spark</name>
<value > ${oozieActionShareLibForSpark2}</value>
</property>
<property >
<name > mapred.job.queue.name</name>
<value > analytics</value>
</property>
2020-04-29 14:29:27 +02:00
</configuration>
</global>
2023-02-06 12:18:53 +01:00
<start to= "resume_from" />
<decision name= "resume_from" >
<switch >
<case to= "Step1" > ${wf:conf('resumeFrom') eq 'Step1'}</case>
<case to= "Step2" > ${wf:conf('resumeFrom') eq 'Step2'}</case>
<case to= "Step3" > ${wf:conf('resumeFrom') eq 'Step3'}</case>
<case to= "Step4" > ${wf:conf('resumeFrom') eq 'Step4'}</case>
<case to= "Step5" > ${wf:conf('resumeFrom') eq 'Step5'}</case>
<case to= "Step6" > ${wf:conf('resumeFrom') eq 'Step6'}</case>
<case to= "Step7" > ${wf:conf('resumeFrom') eq 'Step7'}</case>
<case to= "Step8" > ${wf:conf('resumeFrom') eq 'Step8'}</case>
<case to= "Step9" > ${wf:conf('resumeFrom') eq 'Step9'}</case>
<case to= "Step10" > ${wf:conf('resumeFrom') eq 'Step10'}</case>
<case to= "Step11" > ${wf:conf('resumeFrom') eq 'Step11'}</case>
<case to= "Step12" > ${wf:conf('resumeFrom') eq 'Step12'}</case>
<case to= "Step13" > ${wf:conf('resumeFrom') eq 'Step13'}</case>
<case to= "Step14" > ${wf:conf('resumeFrom') eq 'Step14'}</case>
<case to= "Step15" > ${wf:conf('resumeFrom') eq 'Step15'}</case>
<case to= "Step15_5" > ${wf:conf('resumeFrom') eq 'Step15_5'}</case>
<case to= "Contexts" > ${wf:conf('resumeFrom') eq 'Contexts'}</case>
<case to= "Step16-createIndicatorsTables" > ${wf:conf('resumeFrom') eq 'Step16-createIndicatorsTables'}</case>
<case to= "Step16_1-definitions" > ${wf:conf('resumeFrom') eq 'Step16_1-definitions'}</case>
<case to= "Step16_5" > ${wf:conf('resumeFrom') eq 'Step16_5'}</case>
<case to= "Step19-finalize" > ${wf:conf('resumeFrom') eq 'Step19-finalize'}</case>
<case to= "step20-createMonitorDB" > ${wf:conf('resumeFrom') eq 'step20-createMonitorDB'}</case>
<case to= "step21-createObservatoryDB-pre" > ${wf:conf('resumeFrom') eq 'step21-createObservatoryDB-pre'}</case>
<case to= "step21-createObservatoryDB" > ${wf:conf('resumeFrom') eq 'step21-createObservatoryDB'}</case>
<case to= "step21-createObservatoryDB-post" > ${wf:conf('resumeFrom') eq 'step21-createObservatoryDB-post'}</case>
<case to= "step22-copyDataToImpalaCluster" > ${wf:conf('resumeFrom') eq 'step22-copyDataToImpalaCluster'}</case>
2023-11-15 13:32:18 +01:00
<case to= "step22a-createPDFsAggregated" > ${wf:conf('resumeFrom') eq 'step22a-createPDFsAggregated'}</case>
2023-02-06 12:18:53 +01:00
<case to= "step23-finalizeImpalaCluster" > ${wf:conf('resumeFrom') eq 'step23-finalizeImpalaCluster'}</case>
2023-02-20 08:29:20 +01:00
<case to= "Step24-updateCache" > ${wf:conf('resumeFrom') eq 'Step24-updateCache'}</case>
2023-02-06 12:18:53 +01:00
<default to= "Step1" />
</switch>
</decision>
2020-04-01 21:05:35 +02:00
<kill name= "Kill" >
<message > Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
2020-06-13 12:35:53 +02:00
<action name= "Step1" >
2020-04-01 21:05:35 +02:00
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
2020-06-13 12:35:53 +02:00
<jdbc-url > ${hive_jdbc_url}</jdbc-url>
2020-04-01 21:05:35 +02:00
<script > scripts/step1.sql</script>
2024-01-26 20:19:52 +01:00
<param > stats_db_name=${stats_db_name}</param>
<param > openaire_db_name=${openaire_db_name}</param>
2020-04-01 21:05:35 +02:00
</hive2>
2020-06-11 20:01:14 +02:00
<ok to= "Step2" />
2020-04-01 21:05:35 +02:00
<error to= "Kill" />
</action>
2024-01-26 20:19:52 +01:00
2020-06-11 20:01:14 +02:00
<action name= "Step2" >
2020-04-01 21:05:35 +02:00
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
2020-06-13 12:35:53 +02:00
<jdbc-url > ${hive_jdbc_url}</jdbc-url>
2020-06-11 20:01:14 +02:00
<script > scripts/step2.sql</script>
2024-01-26 20:19:52 +01:00
<param > stats_db_name=${stats_db_name}</param>
<param > openaire_db_name=${openaire_db_name}</param>
2020-04-01 21:05:35 +02:00
</hive2>
<ok to= "Step3" />
<error to= "Kill" />
</action>
2024-01-26 20:19:52 +01:00
2020-04-01 21:05:35 +02:00
<action name= "Step3" >
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
2020-06-13 12:35:53 +02:00
<jdbc-url > ${hive_jdbc_url}</jdbc-url>
2020-04-01 21:05:35 +02:00
<script > scripts/step3.sql</script>
2024-01-26 20:19:52 +01:00
<param > stats_db_name=${stats_db_name}</param>
<param > openaire_db_name=${openaire_db_name}</param>
2020-04-01 21:05:35 +02:00
</hive2>
2020-06-11 20:01:14 +02:00
<ok to= "Step4" />
2020-04-01 21:05:35 +02:00
<error to= "Kill" />
</action>
2024-01-26 20:19:52 +01:00
2020-06-11 20:01:14 +02:00
<action name= "Step4" >
2024-01-26 20:19:52 +01:00
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
2020-06-13 12:35:53 +02:00
<jdbc-url > ${hive_jdbc_url}</jdbc-url>
2020-06-11 20:01:14 +02:00
<script > scripts/step4.sql</script>
2024-01-26 20:19:52 +01:00
<param > stats_db_name=${stats_db_name}</param>
<param > openaire_db_name=${openaire_db_name}</param>
2020-06-11 20:01:14 +02:00
</hive2>
<ok to= "Step5" />
<error to= "Kill" />
</action>
2024-01-26 20:19:52 +01:00
2020-06-11 20:01:14 +02:00
<action name= "Step5" >
2024-01-26 20:19:52 +01:00
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
2020-06-13 12:35:53 +02:00
<jdbc-url > ${hive_jdbc_url}</jdbc-url>
2020-06-11 20:01:14 +02:00
<script > scripts/step5.sql</script>
2024-01-26 20:19:52 +01:00
<param > stats_db_name=${stats_db_name}</param>
<param > openaire_db_name=${openaire_db_name}</param>
2020-06-11 20:01:14 +02:00
</hive2>
<ok to= "Step6" />
<error to= "Kill" />
</action>
2024-01-26 20:19:52 +01:00
2020-06-11 20:01:14 +02:00
<action name= "Step6" >
2024-01-26 20:19:52 +01:00
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
2020-06-13 12:35:53 +02:00
<jdbc-url > ${hive_jdbc_url}</jdbc-url>
2020-06-11 20:01:14 +02:00
<script > scripts/step6.sql</script>
2024-01-26 20:19:52 +01:00
<param > stats_db_name=${stats_db_name}</param>
<param > openaire_db_name=${openaire_db_name}</param>
2020-06-11 20:01:14 +02:00
</hive2>
<ok to= "Step7" />
<error to= "Kill" />
</action>
2024-01-26 20:19:52 +01:00
2020-06-11 20:01:14 +02:00
<action name= "Step7" >
2024-01-26 20:19:52 +01:00
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
2020-06-13 12:35:53 +02:00
<jdbc-url > ${hive_jdbc_url}</jdbc-url>
2020-06-11 20:01:14 +02:00
<script > scripts/step7.sql</script>
2024-01-26 20:19:52 +01:00
<param > stats_db_name=${stats_db_name}</param>
<param > openaire_db_name=${openaire_db_name}</param>
2020-06-11 20:01:14 +02:00
</hive2>
<ok to= "Step8" />
<error to= "Kill" />
</action>
2024-01-26 20:19:52 +01:00
2020-06-11 20:01:14 +02:00
<action name= "Step8" >
2024-01-26 20:19:52 +01:00
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
2020-06-13 12:35:53 +02:00
<jdbc-url > ${hive_jdbc_url}</jdbc-url>
2020-06-11 20:01:14 +02:00
<script > scripts/step8.sql</script>
2024-01-26 20:19:52 +01:00
<param > stats_db_name=${stats_db_name}</param>
<param > openaire_db_name=${openaire_db_name}</param>
2020-06-11 20:01:14 +02:00
</hive2>
<ok to= "Step9" />
<error to= "Kill" />
</action>
2024-01-26 20:19:52 +01:00
2020-06-11 20:01:14 +02:00
<action name= "Step9" >
2024-01-26 20:19:52 +01:00
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
2020-06-13 12:35:53 +02:00
<jdbc-url > ${hive_jdbc_url}</jdbc-url>
2020-06-11 20:01:14 +02:00
<script > scripts/step9.sql</script>
2024-01-26 20:19:52 +01:00
<param > stats_db_name=${stats_db_name}</param>
<param > openaire_db_name=${openaire_db_name}</param>
2020-06-11 20:01:14 +02:00
</hive2>
<ok to= "Step10" />
<error to= "Kill" />
</action>
2024-01-26 20:19:52 +01:00
2020-06-11 20:01:14 +02:00
<action name= "Step10" >
2024-01-26 20:19:52 +01:00
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
2020-06-13 12:35:53 +02:00
<jdbc-url > ${hive_jdbc_url}</jdbc-url>
2020-06-11 20:01:14 +02:00
<script > scripts/step10.sql</script>
2024-01-26 20:19:52 +01:00
<param > stats_db_name=${stats_db_name}</param>
<param > openaire_db_name=${openaire_db_name}</param>
<param > external_stats_db_name=${external_stats_db_name}</param>
2020-04-01 21:05:35 +02:00
</hive2>
<ok to= "Step11" />
<error to= "Kill" />
2024-01-26 20:19:52 +01:00
</action>
2020-06-15 18:57:40 +02:00
2020-04-01 21:05:35 +02:00
<action name= "Step11" >
2024-01-26 20:19:52 +01:00
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
2020-06-13 12:35:53 +02:00
<jdbc-url > ${hive_jdbc_url}</jdbc-url>
2020-04-01 21:05:35 +02:00
<script > scripts/step11.sql</script>
2024-01-26 20:19:52 +01:00
<param > stats_db_name=${stats_db_name}</param>
<param > openaire_db_name=${openaire_db_name}</param>
<param > external_stats_db_name=${external_stats_db_name}</param>
2020-04-01 21:05:35 +02:00
</hive2>
2020-06-11 20:01:14 +02:00
<ok to= "Step12" />
2020-04-01 21:05:35 +02:00
<error to= "Kill" />
2024-01-26 20:19:52 +01:00
</action>
2020-06-11 20:01:14 +02:00
<action name= "Step12" >
2020-06-15 18:57:40 +02:00
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
2020-06-13 12:35:53 +02:00
<jdbc-url > ${hive_jdbc_url}</jdbc-url>
2020-06-11 20:01:14 +02:00
<script > scripts/step12.sql</script>
2024-01-26 20:19:52 +01:00
<param > stats_db_name=${stats_db_name}</param>
<param > openaire_db_name=${openaire_db_name}</param>
2020-04-01 21:05:35 +02:00
</hive2>
<ok to= "Step13" />
<error to= "Kill" />
2020-06-15 18:57:40 +02:00
</action>
2024-01-26 20:19:52 +01:00
2020-04-01 21:05:35 +02:00
<action name= "Step13" >
2020-04-29 14:29:27 +02:00
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
2020-06-13 12:35:53 +02:00
<jdbc-url > ${hive_jdbc_url}</jdbc-url>
2020-04-01 21:05:35 +02:00
<script > scripts/step13.sql</script>
2024-01-26 20:19:52 +01:00
<param > stats_db_name=${stats_db_name}</param>
<param > openaire_db_name=${openaire_db_name}</param>
2020-04-01 21:05:35 +02:00
</hive2>
<ok to= "Step14" />
<error to= "Kill" />
</action>
2024-01-26 20:19:52 +01:00
2020-04-01 21:05:35 +02:00
<action name= "Step14" >
2020-06-15 18:57:40 +02:00
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
<jdbc-url > ${hive_jdbc_url}</jdbc-url>
<script > scripts/step14.sql</script>
2024-01-26 20:19:52 +01:00
<param > stats_db_name=${stats_db_name}</param>
<param > openaire_db_name=${openaire_db_name}</param>
2020-06-15 18:57:40 +02:00
</hive2>
<ok to= "Step15" />
<error to= "Kill" />
</action>
2024-01-26 20:19:52 +01:00
2020-06-15 18:57:40 +02:00
<action name= "Step15" >
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
<jdbc-url > ${hive_jdbc_url}</jdbc-url>
<script > scripts/step15.sql</script>
2024-01-26 20:19:52 +01:00
<param > stats_db_name=${stats_db_name}</param>
<param > openaire_db_name=${openaire_db_name}</param>
2020-06-15 18:57:40 +02:00
</hive2>
2021-09-08 22:07:58 +02:00
<ok to= "Step15_5" />
<error to= "Kill" />
</action>
<action name= "Step15_5" >
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
<jdbc-url > ${hive_jdbc_url}</jdbc-url>
<script > scripts/step15_5.sql</script>
<param > stats_db_name=${stats_db_name}</param>
<param > openaire_db_name=${openaire_db_name}</param>
2023-02-20 08:29:20 +01:00
<param > external_stats_db_name=${external_stats_db_name}</param>
2021-09-08 22:07:58 +02:00
</hive2>
2021-09-13 13:33:23 +02:00
<ok to= "Contexts" />
<error to= "Kill" />
</action>
<action name= "Contexts" >
<shell xmlns= "uri:oozie:shell-action:0.1" >
<job-tracker > ${jobTracker}</job-tracker>
<name-node > ${nameNode}</name-node>
<exec > contexts.sh</exec>
<argument > ${context_api_url}</argument>
<argument > ${stats_db_name}</argument>
<file > contexts.sh</file>
</shell>
2021-09-08 13:08:43 +02:00
<ok to= "Step16-createIndicatorsTables" />
2020-07-02 01:42:30 +02:00
<error to= "Kill" />
</action>
2024-01-26 01:04:48 +01:00
<!-- <action name="Step16 - createIndicatorsTables"> -->
<!-- <hive2 xmlns="uri:oozie:hive2 - action:0.1"> -->
<!-- <jdbc - url>${hive_jdbc_url}</jdbc - url> -->
<!-- <script>scripts/step16 - createIndicatorsTables.sql</script> -->
<!-- <param>stats_db_name=${stats_db_name}</param> -->
<!-- <param>external_stats_db_name=${external_stats_db_name}</param> -->
<!-- </hive2> -->
<!-- <ok to="Step16_1 - definitions"/> -->
<!-- <error to="Kill"/> -->
<!-- </action> -->
2021-09-08 13:08:43 +02:00
<action name= "Step16-createIndicatorsTables" >
2024-01-26 01:04:48 +01:00
<spark xmlns= "uri:oozie:spark-action:0.2" >
<master > yarn</master>
<mode > cluster</mode>
<name > Step16-createIndicatorsTables</name>
<class > eu.dnetlib.dhp.oozie.RunSQLSparkJob</class>
2024-01-26 20:19:52 +01:00
<jar > dhp-stats-update-${projectVersion}.jar</jar>
2024-01-26 01:04:48 +01:00
<spark-opts >
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
${sparkClusterOpts}
${sparkResourceOpts}
${sparkApplicationOpts}
</spark-opts>
<arg > --hiveMetastoreUris</arg> <arg > ${hive_metastore_uris}</arg>
<arg > --sql</arg> <arg > eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql</arg>
<arg > --stats_db_name</arg> <arg > ${stats_db_name}</arg>
<arg > --external_stats_db_name</arg> <arg > ${external_stats_db_name}</arg>
</spark>
2022-07-29 15:36:20 +02:00
<ok to= "Step16_1-definitions" />
2021-09-08 13:08:43 +02:00
<error to= "Kill" />
</action>
<action name= "Step16_1-definitions" >
2020-07-02 01:42:30 +02:00
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
<jdbc-url > ${hive_jdbc_url}</jdbc-url>
2021-09-08 13:08:43 +02:00
<script > scripts/step16_1-definitions.sql</script>
2020-07-02 01:42:30 +02:00
<param > stats_db_name=${stats_db_name}</param>
<param > openaire_db_name=${openaire_db_name}</param>
</hive2>
<ok to= "Step16_5" />
<error to= "Kill" />
</action>
<action name= "Step16_5" >
2020-06-15 18:57:40 +02:00
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
<jdbc-url > ${hive_jdbc_url}</jdbc-url>
2020-07-02 01:42:30 +02:00
<script > scripts/step16_5.sql</script>
2020-06-30 01:13:08 +02:00
<param > stats_db_name=${stats_db_name}</param>
<param > openaire_db_name=${openaire_db_name}</param>
</hive2>
2023-02-06 12:18:53 +01:00
<ok to= "Step19-finalize" />
<error to= "Kill" />
</action>
<action name= "Step19-finalize" >
<shell xmlns= "uri:oozie:shell-action:0.1" >
<job-tracker > ${jobTracker}</job-tracker>
<name-node > ${nameNode}</name-node>
<exec > finalizedb.sh</exec>
<argument > ${stats_db_name}</argument>
<argument > ${stats_db_shadow_name}</argument>
<file > finalizedb.sh</file>
</shell>
2021-02-18 15:42:22 +01:00
<ok to= "step20-createMonitorDB" />
2020-11-29 23:48:10 +01:00
<error to= "Kill" />
</action>
2021-02-18 15:42:22 +01:00
<action name= "step20-createMonitorDB" >
2020-11-29 23:48:10 +01:00
<shell xmlns= "uri:oozie:shell-action:0.1" >
<job-tracker > ${jobTracker}</job-tracker>
<name-node > ${nameNode}</name-node>
2021-02-18 15:42:22 +01:00
<exec > monitor.sh</exec>
<argument > ${stats_db_name}</argument>
<argument > ${monitor_db_name}</argument>
<argument > ${monitor_db_shadow_name}</argument>
<argument > ${wf:appPath()}/scripts/step20-createMonitorDB.sql</argument>
2023-06-02 12:34:16 +02:00
<argument > ${wf:appPath()}/scripts/step20-createMonitorDB_funded.sql</argument>
<argument > ${wf:appPath()}/scripts/step20-createMonitorDB_institutions.sql</argument>
<argument > ${wf:appPath()}/scripts/step20-createMonitorDB_RIs.sql</argument>
<argument > ${wf:appPath()}/scripts/step20-createMonitorDB_RIs_tail.sql</argument>
2023-07-13 14:25:00 +02:00
<argument > ${wf:appPath()}/scripts/step20-createMonitorDBAll.sql</argument>
2021-02-18 15:42:22 +01:00
<file > monitor.sh</file>
2020-11-29 23:48:10 +01:00
</shell>
2021-09-21 02:07:58 +02:00
<ok to= "step21-createObservatoryDB-pre" />
<error to= "Kill" />
</action>
2024-01-26 20:19:52 +01:00
<!-- <action name="step20 - createMonitorDB - post"> -->
<!-- <shell xmlns="uri:oozie:shell - action:0.1"> -->
<!-- <job - tracker>${jobTracker}</job - tracker> -->
<!-- <name - node>${nameNode}</name - node> -->
<!-- <exec>monitor - post.sh</exec> -->
<!-- <argument>${monitor_db_name}</argument> -->
<!-- <argument>${monitor_db_shadow_name}</argument> -->
<!-- <file>monitor - post.sh</file> -->
<!-- </shell> -->
<!-- <ok to="step21 - createObservatoryDB - pre"/> -->
<!-- <error to="Kill"/> -->
<!-- </action> -->
2023-06-02 12:34:16 +02:00
2021-09-21 02:07:58 +02:00
<action name= "step21-createObservatoryDB-pre" >
<shell xmlns= "uri:oozie:shell-action:0.1" >
<job-tracker > ${jobTracker}</job-tracker>
<name-node > ${nameNode}</name-node>
<exec > observatory-pre.sh</exec>
<argument > ${stats_db_name}</argument>
<argument > ${observatory_db_name}</argument>
<argument > ${observatory_db_shadow_name}</argument>
<file > observatory-pre.sh</file>
</shell>
2021-05-28 14:11:46 +02:00
<ok to= "step21-createObservatoryDB" />
<error to= "Kill" />
</action>
<action name= "step21-createObservatoryDB" >
2021-09-21 02:07:58 +02:00
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
<jdbc-url > ${hive_jdbc_url}</jdbc-url>
<script > scripts/step21-createObservatoryDB.sql</script>
<param > stats_db_name=${stats_db_name}</param>
<param > observatory_db_name=${observatory_db_name}</param>
</hive2>
2023-02-06 12:18:53 +01:00
<ok to= "step21-createObservatoryDB-post" />
2021-09-21 02:07:58 +02:00
<error to= "Kill" />
</action>
2023-02-06 12:18:53 +01:00
<action name= "step21-createObservatoryDB-post" >
<shell xmlns= "uri:oozie:shell-action:0.1" >
<job-tracker > ${jobTracker}</job-tracker>
<name-node > ${nameNode}</name-node>
<exec > observatory-post.sh</exec>
<argument > ${observatory_db_name}</argument>
<argument > ${observatory_db_shadow_name}</argument>
<file > observatory-post.sh</file>
</shell>
<ok to= "step22-copyDataToImpalaCluster" />
<error to= "Kill" />
</action>
2021-02-17 01:11:55 +01:00
2023-02-06 12:18:53 +01:00
<action name= "step22-copyDataToImpalaCluster" >
<shell xmlns= "uri:oozie:shell-action:0.1" >
<job-tracker > ${jobTracker}</job-tracker>
<name-node > ${nameNode}</name-node>
<exec > copyDataToImpalaCluster.sh</exec>
2024-01-26 20:19:52 +01:00
<!-- <env - var>HADOOP_USER_NAME=${wf:user()}</env - var> -->
<!-- <argument>${external_stats_db_name}</argument> -->
2023-02-06 12:18:53 +01:00
<argument > ${stats_db_name}</argument>
<argument > ${monitor_db_name}</argument>
<argument > ${observatory_db_name}</argument>
2023-02-14 15:24:08 +01:00
<argument > ${external_stats_db_name}</argument>
2023-05-18 08:33:05 +02:00
<argument > ${usage_stats_db_name}</argument>
2023-02-06 12:18:53 +01:00
<argument > ${hadoop_user_name}</argument>
<file > copyDataToImpalaCluster.sh</file>
</shell>
2023-11-15 13:32:18 +01:00
<ok to= "step22a-createPDFsAggregated" />
<error to= "Kill" />
</action>
<action name= "step22a-createPDFsAggregated" >
<shell xmlns= "uri:oozie:shell-action:0.1" >
<job-tracker > ${jobTracker}</job-tracker>
<name-node > ${nameNode}</name-node>
<exec > createPDFsAggregated.sh</exec>
<!-- <env - var>HADOOP_USER_NAME=${wf:user()}</env - var> -->
<!-- <argument>${external_stats_db_name}</argument> -->
<argument > ${stats_db_name}</argument>
<argument > ${monitor_db_name}</argument>
<argument > ${hadoop_user_name}</argument>
<file > createPDFsAggregated.sh</file>
</shell>
2023-02-06 12:18:53 +01:00
<ok to= "step23-finalizeImpalaCluster" />
<error to= "Kill" />
</action>
2021-12-13 15:26:14 +01:00
2023-02-06 12:18:53 +01:00
<action name= "step23-finalizeImpalaCluster" >
<shell xmlns= "uri:oozie:shell-action:0.1" >
<job-tracker > ${jobTracker}</job-tracker>
<name-node > ${nameNode}</name-node>
<exec > finalizeImpalaCluster.sh</exec>
<argument > ${stats_db_name}</argument>
<argument > ${stats_db_shadow_name}</argument>
<argument > ${monitor_db_name}</argument>
<argument > ${monitor_db_shadow_name}</argument>
<argument > ${observatory_db_name}</argument>
<argument > ${observatory_db_shadow_name}</argument>
2023-05-18 08:33:05 +02:00
<argument > ${usage_stats_db_name}</argument>
<argument > ${usage_stats_db_shadow_name}</argument>
2023-02-06 12:18:53 +01:00
<file > finalizeImpalaCluster.sh</file>
</shell>
2023-09-01 09:57:02 +02:00
<ok to= "Step24-updateCache" />
2023-02-06 12:18:53 +01:00
<error to= "Kill" />
</action>
2021-12-13 15:26:14 +01:00
2023-02-20 08:29:20 +01:00
<action name= "Step24-updateCache" >
<shell xmlns= "uri:oozie:shell-action:0.1" >
<job-tracker > ${jobTracker}</job-tracker>
<name-node > ${nameNode}</name-node>
<exec > updateCache.sh</exec>
<argument > ${stats_tool_api_url}</argument>
<file > updateCache.sh</file>
</shell>
<ok to= "End" />
<error to= "Kill" />
</action>
2020-11-10 16:11:12 +01:00
2020-04-01 21:05:35 +02:00
<end name= "End" />
2024-01-26 20:19:52 +01:00
</workflow-app>