From a1861b9eaa7570c890877ec8e142a49bbebfb12b Mon Sep 17 00:00:00 2001 From: Enrico Ottonello Date: Fri, 24 Apr 2020 18:33:37 +0200 Subject: [PATCH] workflow works in parallel on 2 activity files --- .../orcid/OrcidAuthorsDOIsDataGen.java | 6 +--- .../oozie_app/workflow.xml | 32 ++++++++++++++++--- 2 files changed, 28 insertions(+), 10 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidAuthorsDOIsDataGen.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidAuthorsDOIsDataGen.java index 7596cf67f..c7c1d75cf 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidAuthorsDOIsDataGen.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidAuthorsDOIsDataGen.java @@ -25,11 +25,7 @@ public class OrcidAuthorsDOIsDataGen extends OrcidDSManager { String tarGzUri = hdfsServerUri.concat(hdfsOrcidDefaultPath).concat(activitiesFileNameTarGz); Path outputPath = - new Path( - hdfsServerUri - .concat(hdfsOrcidDefaultPath) - .concat(outputAuthorsDOIsPath) - .concat("authors_dois.seq")); + new Path(hdfsServerUri.concat(hdfsOrcidDefaultPath).concat(outputAuthorsDOIsPath)); ActivitiesDecompressor.parseGzActivities(conf, tarGzUri, outputPath); } diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_authors_dois_data/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_authors_dois_data/oozie_app/workflow.xml index 5d7222d07..3dabb765b 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_authors_dois_data/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_authors_dois_data/oozie_app/workflow.xml @@ -18,11 +18,16 @@ - + - + + + + + + ${jobTracker} ${nameNode} @@ -30,10 +35,27 @@ -d${workingPath_activities}/ -n${nameNode} -fORCID_2019_activites_0.tar.gz - -ooutput/ + -ooutput/authors_dois_0.seq - + - + + + + ${jobTracker} + ${nameNode} + eu.dnetlib.doiboost.orcid.OrcidAuthorsDOIsDataGen + -d${workingPath_activities}/ + -n${nameNode} + -fORCID_2019_activites_1.tar.gz + -ooutput/authors_dois_1.seq + + + + + + + + \ No newline at end of file