workflow works in parallel on 2 activity files

This commit is contained in:
Enrico Ottonello 2020-04-24 18:33:37 +02:00
parent 941e94af06
commit a1861b9eaa
2 changed files with 28 additions and 10 deletions

View File

@ -25,11 +25,7 @@ public class OrcidAuthorsDOIsDataGen extends OrcidDSManager {
String tarGzUri = String tarGzUri =
hdfsServerUri.concat(hdfsOrcidDefaultPath).concat(activitiesFileNameTarGz); hdfsServerUri.concat(hdfsOrcidDefaultPath).concat(activitiesFileNameTarGz);
Path outputPath = Path outputPath =
new Path( new Path(hdfsServerUri.concat(hdfsOrcidDefaultPath).concat(outputAuthorsDOIsPath));
hdfsServerUri
.concat(hdfsOrcidDefaultPath)
.concat(outputAuthorsDOIsPath)
.concat("authors_dois.seq"));
ActivitiesDecompressor.parseGzActivities(conf, tarGzUri, outputPath); ActivitiesDecompressor.parseGzActivities(conf, tarGzUri, outputPath);
} }

View File

@ -18,11 +18,16 @@
<delete path='${workingPath_activities}/output'/> <delete path='${workingPath_activities}/output'/>
<mkdir path='${workingPath_activities}/output'/> <mkdir path='${workingPath_activities}/output'/>
</fs> </fs>
<ok to="GenerateOrcidAuthorsDOIsData"/> <ok to="fork_generate_orcid_authors_dois"/>
<error to="Kill"/> <error to="Kill"/>
</action> </action>
<action name="GenerateOrcidAuthorsDOIsData"> <fork name = "fork_generate_orcid_authors_dois">
<path start = "Generate_Orcid_Authors_DOIs_0"/>
<path start = "Generate_Orcid_Authors_DOIs_1"/>
</fork>
<action name="Generate_Orcid_Authors_DOIs_0">
<java> <java>
<job-tracker>${jobTracker}</job-tracker> <job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node> <name-node>${nameNode}</name-node>
@ -30,10 +35,27 @@
<arg>-d</arg><arg>${workingPath_activities}/</arg> <arg>-d</arg><arg>${workingPath_activities}/</arg>
<arg>-n</arg><arg>${nameNode}</arg> <arg>-n</arg><arg>${nameNode}</arg>
<arg>-f</arg><arg>ORCID_2019_activites_0.tar.gz</arg> <arg>-f</arg><arg>ORCID_2019_activites_0.tar.gz</arg>
<arg>-o</arg><arg>output/</arg> <arg>-o</arg><arg>output/authors_dois_0.seq</arg>
</java> </java>
<ok to="End"/> <ok to="join_node"/>
<error to="Kill"/> <error to="Kill"/>
</action> </action>
<end name="End"/>
<action name="Generate_Orcid_Authors_DOIs_1">
<java>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<main-class>eu.dnetlib.doiboost.orcid.OrcidAuthorsDOIsDataGen</main-class>
<arg>-d</arg><arg>${workingPath_activities}/</arg>
<arg>-n</arg><arg>${nameNode}</arg>
<arg>-f</arg><arg>ORCID_2019_activites_1.tar.gz</arg>
<arg>-o</arg><arg>output/authors_dois_1.seq</arg>
</java>
<ok to="join_node"/>
<error to="Kill"/>
</action>
<join name = "join_node" to = "End"/>
<end name="End"/>
</workflow-app> </workflow-app>