2020-06-11 20:01:14 +02:00
<workflow-app name= "import_infospace_graph" xmlns= "uri:oozie:workflow:0.5" >
2020-04-01 21:05:35 +02:00
<parameters >
<property >
2020-06-11 20:01:14 +02:00
<name > hive_db_name</name>
<description > the target hive database name</description>
2020-04-01 21:05:35 +02:00
</property>
<property >
2020-06-11 20:01:14 +02:00
<name > hive_source_db_name</name>
<description > the source hive database name</description>
2020-04-29 14:29:27 +02:00
</property>
2020-06-05 21:29:54 +02:00
<property >
<name > hive_timeout</name>
<description > the time period, in seconds, after which Hive fails a transaction if a Hive client has not sent a hearbeat. The default value is 300 seconds.</description>
</property>
2020-04-01 21:05:35 +02:00
</parameters>
2020-04-29 14:29:27 +02:00
<global >
<job-tracker > ${jobTracker}</job-tracker>
<name-node > ${nameNode}</name-node>
<configuration >
<property >
<name > hive.metastore.uris</name>
2020-05-04 11:41:46 +02:00
<value > ${hiveMetastoreUris}</value>
2020-04-29 14:29:27 +02:00
</property>
2020-06-05 21:29:54 +02:00
<property >
<name > hive.txn.timeout</name>
<value > ${hive_timeout}</value>
</property>
2020-04-29 14:29:27 +02:00
</configuration>
</global>
2020-06-11 20:01:14 +02:00
<start to= "Step1-CreateDB" />
2020-04-01 21:05:35 +02:00
<kill name= "Kill" >
<message > Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
2020-06-11 20:01:14 +02:00
<action name= "Step1-CreateDB" >
2020-04-01 21:05:35 +02:00
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
2020-06-11 20:01:14 +02:00
<job-tracker > ${jobTracker}</job-tracker>
<name-node > ${nameNode}</name-node>
<configuration >
<property >
<name > hive.metastore.uris</name>
<value > ${hive_metastore_uris}</value>
</property>
</configuration>
<jdbc-url > ${hive_jdbc_url}/${hive_db_name}</jdbc-url>
2020-04-01 21:05:35 +02:00
<script > scripts/step1.sql</script>
2020-06-11 20:01:14 +02:00
<param > hive_db_name=${hive_db_name}</param>
<param > hive_source_db_name=${hive_source_db_name}</param>
2020-04-01 21:05:35 +02:00
</hive2>
2020-06-11 20:01:14 +02:00
<ok to= "Step2" />
2020-04-01 21:05:35 +02:00
<error to= "Kill" />
</action>
2020-06-11 20:01:14 +02:00
<action name= "Step2" >
2020-04-01 21:05:35 +02:00
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
2020-06-11 20:01:14 +02:00
<job-tracker > ${jobTracker}</job-tracker>
<name-node > ${nameNode}</name-node>
<configuration >
<property >
<name > hive.metastore.uris</name>
<value > ${hive_metastore_uris}</value>
</property>
</configuration>
<jdbc-url > ${hive_jdbc_url}/${hive_db_name}</jdbc-url>
<script > scripts/step2.sql</script>
<param > hive_db_name=${hive_db_name}</param>
<param > hive_source_db_name=${hive_source_db_name}</param>
2020-04-01 21:05:35 +02:00
</hive2>
<ok to= "Step3" />
<error to= "Kill" />
</action>
<action name= "Step3" >
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
2020-06-11 20:01:14 +02:00
<job-tracker > ${jobTracker}</job-tracker>
<name-node > ${nameNode}</name-node>
<configuration >
<property >
<name > hive.metastore.uris</name>
<value > ${hive_metastore_uris}</value>
</property>
</configuration>
<jdbc-url > ${hive_jdbc_url}/${hive_db_name}</jdbc-url>
2020-04-01 21:05:35 +02:00
<script > scripts/step3.sql</script>
2020-06-11 20:01:14 +02:00
<param > hive_db_name=${hive_db_name}</param>
<param > hive_source_db_name=${hive_source_db_name}</param>
2020-04-01 21:05:35 +02:00
</hive2>
2020-06-11 20:01:14 +02:00
<ok to= "Step4" />
<!-- <ok to="End"/> -->
2020-04-01 21:05:35 +02:00
<error to= "Kill" />
</action>
2020-06-11 20:01:14 +02:00
<action name= "Step4" >
2020-04-01 21:05:35 +02:00
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
2020-06-11 20:01:14 +02:00
<job-tracker > ${jobTracker}</job-tracker>
<name-node > ${nameNode}</name-node>
<configuration >
<property >
<name > hive.metastore.uris</name>
<value > ${hive_metastore_uris}</value>
</property>
<property >
<name > hive.variable.substitute</name>
<value > true</value>
</property>
</configuration>
<jdbc-url > ${hive_jdbc_url}/${hive_db_name}</jdbc-url>
<script > scripts/step4.sql</script>
<param > hive_db_name=${hive_db_name}</param>
<param > hive_source_db_name=${hive_source_db_name}</param>
</hive2>
<ok to= "Step5" />
<error to= "Kill" />
</action>
<action name= "Step5" >
2020-04-01 21:05:35 +02:00
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
2020-06-11 20:01:14 +02:00
<job-tracker > ${jobTracker}</job-tracker>
<name-node > ${nameNode}</name-node>
<configuration >
<property >
<name > hive.metastore.uris</name>
<value > ${hive_metastore_uris}</value>
</property>
</configuration>
<jdbc-url > ${hive_jdbc_url}/${hive_db_name}</jdbc-url>
<script > scripts/step5.sql</script>
<param > hive_db_name=${hive_db_name}</param>
<param > hive_source_db_name=${hive_source_db_name}</param>
</hive2>
<ok to= "Step6" />
<error to= "Kill" />
</action>
<action name= "Step6" >
2020-04-01 21:05:35 +02:00
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
2020-06-11 20:01:14 +02:00
<job-tracker > ${jobTracker}</job-tracker>
<name-node > ${nameNode}</name-node>
<configuration >
<property >
<name > hive.metastore.uris</name>
<value > ${hive_metastore_uris}</value>
</property>
</configuration>
<jdbc-url > ${hive_jdbc_url}/${hive_db_name}</jdbc-url>
<script > scripts/step6.sql</script>
<param > hive_db_name=${hive_db_name}</param>
<param > hive_source_db_name=${hive_source_db_name}</param>
</hive2>
<ok to= "Step7" />
<error to= "Kill" />
</action>
<action name= "Step7" >
2020-04-01 21:05:35 +02:00
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
2020-06-11 20:01:14 +02:00
<job-tracker > ${jobTracker}</job-tracker>
<name-node > ${nameNode}</name-node>
<configuration >
<property >
<name > hive.metastore.uris</name>
<value > ${hive_metastore_uris}</value>
</property>
</configuration>
<jdbc-url > ${hive_jdbc_url}/${hive_db_name}</jdbc-url>
<script > scripts/step7.sql</script>
<param > hive_db_name=${hive_db_name}</param>
<param > hive_source_db_name=${hive_source_db_name}</param>
</hive2>
<ok to= "Step8" />
<error to= "Kill" />
</action>
<action name= "Step8" >
2020-04-01 21:05:35 +02:00
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
2020-06-11 20:01:14 +02:00
<job-tracker > ${jobTracker}</job-tracker>
<name-node > ${nameNode}</name-node>
<configuration >
<property >
<name > hive.metastore.uris</name>
<value > ${hive_metastore_uris}</value>
</property>
</configuration>
<jdbc-url > ${hive_jdbc_url}/${hive_db_name}</jdbc-url>
<script > scripts/step8.sql</script>
<param > hive_db_name=${hive_db_name}</param>
<param > hive_source_db_name=${hive_source_db_name}</param>
</hive2>
<ok to= "Step9" />
<error to= "Kill" />
</action>
<action name= "Step9" >
2020-04-01 21:05:35 +02:00
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
2020-06-11 20:01:14 +02:00
<job-tracker > ${jobTracker}</job-tracker>
<name-node > ${nameNode}</name-node>
<configuration >
<property >
<name > hive.metastore.uris</name>
<value > ${hive_metastore_uris}</value>
</property>
</configuration>
<jdbc-url > ${hive_jdbc_url}/${hive_db_name}</jdbc-url>
<script > scripts/step9.sql</script>
<param > hive_db_name=${hive_db_name}</param>
<param > hive_source_db_name=${hive_source_db_name}</param>
</hive2>
<ok to= "Step10" />
<error to= "Kill" />
</action>
<action name= "Step10" >
2020-04-29 14:29:27 +02:00
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
2020-06-11 20:01:14 +02:00
<job-tracker > ${jobTracker}</job-tracker>
<name-node > ${nameNode}</name-node>
<configuration >
<property >
<name > hive.metastore.uris</name>
<value > ${hive_metastore_uris}</value>
</property>
</configuration>
<jdbc-url > ${hive_jdbc_url}/${hive_db_name}</jdbc-url>
<script > scripts/step10.sql</script>
<param > hive_db_name=${hive_db_name}</param>
<param > hive_source_db_name=${hive_source_db_name}</param>
2020-04-01 21:05:35 +02:00
</hive2>
<ok to= "Step11" />
<error to= "Kill" />
</action>
<action name= "Step11" >
2020-06-11 20:01:14 +02:00
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
<job-tracker > ${jobTracker}</job-tracker>
<name-node > ${nameNode}</name-node>
<configuration >
<property >
<name > hive.metastore.uris</name>
<value > ${hive_metastore_uris}</value>
</property>
</configuration>
<jdbc-url > ${hive_jdbc_url}/${hive_db_name}</jdbc-url>
2020-04-01 21:05:35 +02:00
<script > scripts/step11.sql</script>
2020-06-11 20:01:14 +02:00
<param > hive_db_name=${hive_db_name}</param>
<param > hive_source_db_name=${hive_source_db_name}</param>
2020-04-01 21:05:35 +02:00
</hive2>
2020-06-11 20:01:14 +02:00
<ok to= "Step12" />
2020-04-01 21:05:35 +02:00
<error to= "Kill" />
</action>
2020-06-11 20:01:14 +02:00
<action name= "Step12" >
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
<job-tracker > ${jobTracker}</job-tracker>
<name-node > ${nameNode}</name-node>
<configuration >
<property >
<name > hive.metastore.uris</name>
<value > ${hive_metastore_uris}</value>
</property>
</configuration>
<jdbc-url > ${hive_jdbc_url}/${hive_db_name}</jdbc-url>
<script > scripts/step12.sql</script>
<param > hive_db_name=${hive_db_name}</param>
<param > hive_source_db_name=${hive_source_db_name}</param>
2020-04-01 21:05:35 +02:00
</hive2>
<ok to= "Step13" />
<error to= "Kill" />
</action>
<action name= "Step13" >
2020-04-29 14:29:27 +02:00
<hive2 xmlns= "uri:oozie:hive2-action:0.1" >
2020-06-11 20:01:14 +02:00
<job-tracker > ${jobTracker}</job-tracker>
<name-node > ${nameNode}</name-node>
<configuration >
<property >
<name > hive.metastore.uris</name>
<value > ${hive_metastore_uris}</value>
</property>
</configuration>
<jdbc-url > ${hive_jdbc_url}/${hive_db_name}</jdbc-url>
2020-04-01 21:05:35 +02:00
<script > scripts/step13.sql</script>
2020-06-11 20:01:14 +02:00
<param > hive_db_name=${hive_db_name}</param>
<param > hive_source_db_name=${hive_source_db_name}</param>
2020-04-01 21:05:35 +02:00
</hive2>
<ok to= "Step14" />
<error to= "Kill" />
</action>
<action name= "Step14" >
2020-06-11 20:01:14 +02:00
<shell xmlns= "uri:oozie:shell-action:0.1" >
<job-tracker > ${jobTracker}</job-tracker>
<name-node > ${nameNode}</name-node>
<!-- <configuration>
<property >
<name > hive.metastore.uris</name>
<value > ${hive_metastore_uris}</value>
</property>
</configuration> -->
<exec > impala-shell.sh</exec>
<argument > ${hive_db_name}</argument>
<argument > impala-stats.sql</argument>
<argument > /user/giorgos.alexiou/graph/oozie_app/impala-stats.sql</argument>
<file > impala-shell.sh</file>
<!-- <param>hive_db_name=${hive_db_name}</param> -->
</shell>
2020-04-01 21:05:35 +02:00
<ok to= "End" />
<error to= "Kill" />
</action>
2020-06-11 20:01:14 +02:00
2020-04-01 21:05:35 +02:00
<end name= "End" />
</workflow-app>