changed the download.sh, added skip step to allow to not execute one phase and changed the workflow sequence of steps

This commit is contained in:
Miriam Baglioni 2021-07-16 15:01:25 +02:00
parent acd6056330
commit c4b18e6ccb
3 changed files with 9 additions and 7 deletions

View File

@ -1,2 +1,2 @@
#!bin/bash
curl -LSs $1 | hdfs dfs -put - $2$3
curl -LSs $1 | hdfs dfs -put - $2/$3

View File

@ -70,7 +70,7 @@
<case to="ResetMagWorkingPath">${wf:conf('resumeFrom') eq 'ResetMagWorkingPath'}</case>
<case to="ConvertMagToDataset">${wf:conf('resumeFrom') eq 'ConvertMagToDataset'}</case>
<case to="PreProcessORCID">${wf:conf('resumeFrom') eq 'PreProcessORCID'}</case>
<default to="DownloadDump"/>
<default to="removeFiles"/> <!-- first action to be done when downloadDump is to be performed -->
</switch>
</decision>
@ -78,6 +78,7 @@
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<action name="DownloadDump">
<shell xmlns="uri:oozie:shell-action:0.2">
<job-tracker>${jobTracker}</job-tracker>
@ -105,7 +106,7 @@
<name-node>${nameNode}</name-node>
<main-class>eu.dnetlib.doiboost.crossref.ExtractCrossrefRecords</main-class>
<arg>--hdfsServerUri</arg><arg>${nameNode}</arg>
<arg>--crossrefFileNameTarGz</arg><arg>${crossrefDumpPath}/crossref.tar.gz</arg>
<arg>--crossrefFileNameTarGz</arg><arg>${crossrefdumpfilename}</arg>
<arg>--workingPath</arg><arg>${crossrefDumpPath}</arg>
<arg>--outputPath</arg><arg>${crossrefDumpPath}/files/</arg>
</java>
@ -161,16 +162,16 @@
<arg>--targetPath</arg><arg>${inputPathCrossref}/crossref_ds</arg>
</spark>
<ok to="removeFiles"/>
<ok to="ResetMagWorkingPath"/>
<error to="Kill"/>
</action>
<action name="removeFiles">
<fs>
<!-- <delete path="${crossrefDumpPath}/files"/>-->
<delete path="${crossrefDumpPath}/files"/>
<delete path="${crossrefDumpPath}/crossref_unpack/"/>
</fs>
<ok to="ResetMagWorkingPath"/>
<ok to="DownloadDump"/>
<error to="Kill"/>
</action>

View File

@ -75,6 +75,7 @@
<decision name="resume_from">
<switch>
<case to="End">${wf:conf('resumeFrom') eq 'Skip'}</case>
<case to="ProcessMAG">${wf:conf('resumeFrom') eq 'PreprocessMag'}</case>
<case to="ProcessUW">${wf:conf('resumeFrom') eq 'PreprocessUW'}</case>
<case to="ProcessORCID">${wf:conf('resumeFrom') eq 'ProcessORCID'}</case>