added global properties, moved postprocessing script inside the oozie_app directory

This commit is contained in:
Claudio Atzori 2020-03-18 15:43:54 +01:00
parent c7e0730720
commit abe8fb69a2
4 changed files with 58 additions and 22 deletions

View File

@ -1,4 +1,5 @@
<workflow-app name="import regular entities as Graph (all steps)" xmlns="uri:oozie:workflow:0.5">
<parameters>
<property>
<name>workingPath</name>
@ -48,6 +49,21 @@
</property>
</parameters>
<global>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<configuration>
<property>
<name>mapreduce.job.queuename</name>
<value>${queueName}</value>
</property>
<property>
<name>oozie.launcher.mapred.job.queue.name</name>
<value>${oozieLauncherQueueName}</value>
</property>
</configuration>
</global>
<start to="ReuseContent"/>
<kill name="Kill">
@ -73,8 +89,6 @@
<action name="ImportDB">
<java>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<main-class>eu.dnetlib.dhp.migration.step1.MigrateDbEntitiesApplication</main-class>
<arg>-p</arg><arg>${workingPath}/db_records</arg>
<arg>-pgurl</arg><arg>${postgresURL}</arg>
@ -87,8 +101,6 @@
<action name="ImportODF">
<java>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<main-class>eu.dnetlib.dhp.migration.step1.MigrateMongoMdstoresApplication</main-class>
<arg>-p</arg><arg>${workingPath}/odf_records</arg>
<arg>-mongourl</arg><arg>${mongoURL}</arg>
@ -103,8 +115,6 @@
<action name="ImportOAF">
<java>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<main-class>eu.dnetlib.dhp.migration.step1.MigrateMongoMdstoresApplication</main-class>
<arg>-p</arg><arg>${workingPath}/oaf_records</arg>
<arg>-mongourl</arg><arg>${mongoURL}</arg>
@ -127,9 +137,7 @@
<action name="GenerateEntities">
<spark xmlns="uri:oozie:spark-action:0.2">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<master>yarn-cluster</master>
<master>yarn</master>
<mode>cluster</mode>
<name>GenerateEntities</name>
<class>eu.dnetlib.dhp.migration.step2.GenerateEntitiesApplication</class>
@ -164,9 +172,7 @@
<action name="GenerateGraph">
<spark xmlns="uri:oozie:spark-action:0.2">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<master>yarn-cluster</master>
<master>yarn</master>
<mode>cluster</mode>
<name>GenerateGraph</name>
<class>eu.dnetlib.dhp.migration.step3.DispatchEntitiesApplication</class>

View File

@ -1,4 +1,5 @@
<workflow-app name="import_infospace_graph" xmlns="uri:oozie:workflow:0.5">
<parameters>
<property>
<name>sourcePath</name>
@ -22,6 +23,21 @@
</property>
</parameters>
<global>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<configuration>
<property>
<name>mapreduce.job.queuename</name>
<value>${queueName}</value>
</property>
<property>
<name>oozie.launcher.mapred.job.queue.name</name>
<value>${oozieLauncherQueueName}</value>
</property>
</configuration>
</global>
<start to="MapGraphIntoDataFrame"/>
<kill name="Kill">
@ -30,9 +46,7 @@
<action name="MapGraphIntoDataFrame">
<spark xmlns="uri:oozie:spark-action:0.2">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<master>yarn-cluster</master>
<master>yarn</master>
<mode>cluster</mode>
<name>MapGraphIntoDataFrame</name>
<class>eu.dnetlib.dhp.graph.SparkGraphImporterJob</class>
@ -56,9 +70,13 @@
<action name="PostProcessing">
<hive xmlns="uri:oozie:hive-action:0.2">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<script>/eu/dnetlib/dhp/graph/hive/postprocessing.sql</script>
<configuration>
<property>
<name>oozie.hive.defaults</name>
<value>hive-site.xml</value>
</property>
</configuration>
<script>lib/scripts/postprocessing.sql</script>
<param>hive_db_name=${hive_db_name}</param>
</hive>
<ok to="End"/>

View File

@ -1,4 +1,5 @@
<workflow-app name="index_infospace_graph" xmlns="uri:oozie:workflow:0.5">
<parameters>
<property>
<name>hive_db_name</name>
@ -26,6 +27,21 @@
</property>
</parameters>
<global>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<configuration>
<property>
<name>mapreduce.job.queuename</name>
<value>${queueName}</value>
</property>
<property>
<name>oozie.launcher.mapred.job.queue.name</name>
<value>${oozieLauncherQueueName}</value>
</property>
</configuration>
</global>
<start to="reuse_records"/>
<decision name="reuse_records">
@ -42,8 +58,6 @@
<action name="adjancency_lists">
<spark xmlns="uri:oozie:spark-action:0.2">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<master>yarn</master>
<mode>cluster</mode>
<name>build_adjacency_lists</name>
@ -71,8 +85,6 @@
<action name="to_solr_index">
<spark xmlns="uri:oozie:spark-action:0.2">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<master>yarn</master>
<mode>cluster</mode>
<name>to_solr_index</name>