changed workflow flow to possibly parallelize also the programme and project preparation steps

This commit is contained in:
Miriam Baglioni 2021-05-21 14:41:57 +02:00
parent 073d76864d
commit c844877de2
1 changed files with 17 additions and 8 deletions

View File

@ -41,11 +41,17 @@
<fork name="fork_get_info"> <fork name="fork_get_info">
<path start="get_project_file"/> <path start="fork_get_projects"/>
<path start="get_programme_file"/> <path start="get_programme_file"/>
<path start="get_topic_file"/> <path start="get_topic_file"/>
</fork>
<fork name="fork_get_projects">
<path start="get_project_file"/>
<path start="read_projects"/> <path start="read_projects"/>
</fork> </fork>
<action name="get_project_file"> <action name="get_project_file">
<java> <java>
<main-class>eu.dnetlib.dhp.actionmanager.project.utils.ReadCSV</main-class> <main-class>eu.dnetlib.dhp.actionmanager.project.utils.ReadCSV</main-class>
@ -54,7 +60,7 @@
<arg>--hdfsPath</arg><arg>${workingDir}/projects</arg> <arg>--hdfsPath</arg><arg>${workingDir}/projects</arg>
<arg>--classForName</arg><arg>eu.dnetlib.dhp.actionmanager.project.utils.CSVProject</arg> <arg>--classForName</arg><arg>eu.dnetlib.dhp.actionmanager.project.utils.CSVProject</arg>
</java> </java>
<ok to="wait"/> <ok to="wait_projects"/>
<error to="Kill"/> <error to="Kill"/>
</action> </action>
@ -66,7 +72,7 @@
<arg>--hdfsPath</arg><arg>${workingDir}/programme</arg> <arg>--hdfsPath</arg><arg>${workingDir}/programme</arg>
<arg>--classForName</arg><arg>eu.dnetlib.dhp.actionmanager.project.utils.CSVProgramme</arg> <arg>--classForName</arg><arg>eu.dnetlib.dhp.actionmanager.project.utils.CSVProgramme</arg>
</java> </java>
<ok to="wait"/> <ok to="prepare_programme"/>
<error to="Kill"/> <error to="Kill"/>
</action> </action>
@ -92,12 +98,10 @@
<arg>--postgresUser</arg><arg>${postgresUser}</arg> <arg>--postgresUser</arg><arg>${postgresUser}</arg>
<arg>--postgresPassword</arg><arg>${postgresPassword}</arg> <arg>--postgresPassword</arg><arg>${postgresPassword}</arg>
</java> </java>
<ok to="wait"/> <ok to="wait_projects"/>
<error to="Kill"/> <error to="Kill"/>
</action> </action>
<join name="wait" to="prepare_programme"/>
<action name="prepare_programme"> <action name="prepare_programme">
<spark xmlns="uri:oozie:spark-action:0.2"> <spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master> <master>yarn</master>
@ -118,10 +122,15 @@
<arg>--programmePath</arg><arg>${workingDir}/programme</arg> <arg>--programmePath</arg><arg>${workingDir}/programme</arg>
<arg>--outputPath</arg><arg>${workingDir}/preparedProgramme</arg> <arg>--outputPath</arg><arg>${workingDir}/preparedProgramme</arg>
</spark> </spark>
<ok to="prepare_project"/> <ok to="wait"/>
<error to="Kill"/> <error to="Kill"/>
</action> </action>
<join name="wait" to="create_updates"/>
<join name="wait_projects" to="prepare_project"/>
<action name="prepare_project"> <action name="prepare_project">
<spark xmlns="uri:oozie:spark-action:0.2"> <spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master> <master>yarn</master>
@ -143,7 +152,7 @@
<arg>--outputPath</arg><arg>${workingDir}/preparedProjects</arg> <arg>--outputPath</arg><arg>${workingDir}/preparedProjects</arg>
<arg>--dbProjectPath</arg><arg>${workingDir}/dbProjects</arg> <arg>--dbProjectPath</arg><arg>${workingDir}/dbProjects</arg>
</spark> </spark>
<ok to="create_updates"/> <ok to="wait"/>
<error to="Kill"/> <error to="Kill"/>
</action> </action>