changed the download.sh, added skip step to allow to not execute one phase and changed the workflow sequence of steps

This commit is contained in:
Miriam Baglioni 2021-07-16 15:01:25 +02:00
parent acd6056330
commit c4b18e6ccb
3 changed files with 9 additions and 7 deletions

View File

@ -1,2 +1,2 @@
#!bin/bash #!bin/bash
curl -LSs $1 | hdfs dfs -put - $2$3 curl -LSs $1 | hdfs dfs -put - $2/$3

View File

@ -70,7 +70,7 @@
<case to="ResetMagWorkingPath">${wf:conf('resumeFrom') eq 'ResetMagWorkingPath'}</case> <case to="ResetMagWorkingPath">${wf:conf('resumeFrom') eq 'ResetMagWorkingPath'}</case>
<case to="ConvertMagToDataset">${wf:conf('resumeFrom') eq 'ConvertMagToDataset'}</case> <case to="ConvertMagToDataset">${wf:conf('resumeFrom') eq 'ConvertMagToDataset'}</case>
<case to="PreProcessORCID">${wf:conf('resumeFrom') eq 'PreProcessORCID'}</case> <case to="PreProcessORCID">${wf:conf('resumeFrom') eq 'PreProcessORCID'}</case>
<default to="DownloadDump"/> <default to="removeFiles"/> <!-- first action to be done when downloadDump is to be performed -->
</switch> </switch>
</decision> </decision>
@ -78,6 +78,7 @@
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message> <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill> </kill>
<action name="DownloadDump"> <action name="DownloadDump">
<shell xmlns="uri:oozie:shell-action:0.2"> <shell xmlns="uri:oozie:shell-action:0.2">
<job-tracker>${jobTracker}</job-tracker> <job-tracker>${jobTracker}</job-tracker>
@ -105,7 +106,7 @@
<name-node>${nameNode}</name-node> <name-node>${nameNode}</name-node>
<main-class>eu.dnetlib.doiboost.crossref.ExtractCrossrefRecords</main-class> <main-class>eu.dnetlib.doiboost.crossref.ExtractCrossrefRecords</main-class>
<arg>--hdfsServerUri</arg><arg>${nameNode}</arg> <arg>--hdfsServerUri</arg><arg>${nameNode}</arg>
<arg>--crossrefFileNameTarGz</arg><arg>${crossrefDumpPath}/crossref.tar.gz</arg> <arg>--crossrefFileNameTarGz</arg><arg>${crossrefdumpfilename}</arg>
<arg>--workingPath</arg><arg>${crossrefDumpPath}</arg> <arg>--workingPath</arg><arg>${crossrefDumpPath}</arg>
<arg>--outputPath</arg><arg>${crossrefDumpPath}/files/</arg> <arg>--outputPath</arg><arg>${crossrefDumpPath}/files/</arg>
</java> </java>
@ -161,16 +162,16 @@
<arg>--targetPath</arg><arg>${inputPathCrossref}/crossref_ds</arg> <arg>--targetPath</arg><arg>${inputPathCrossref}/crossref_ds</arg>
</spark> </spark>
<ok to="removeFiles"/> <ok to="ResetMagWorkingPath"/>
<error to="Kill"/> <error to="Kill"/>
</action> </action>
<action name="removeFiles"> <action name="removeFiles">
<fs> <fs>
<!-- <delete path="${crossrefDumpPath}/files"/>--> <delete path="${crossrefDumpPath}/files"/>
<delete path="${crossrefDumpPath}/crossref_unpack/"/> <delete path="${crossrefDumpPath}/crossref_unpack/"/>
</fs> </fs>
<ok to="ResetMagWorkingPath"/> <ok to="DownloadDump"/>
<error to="Kill"/> <error to="Kill"/>
</action> </action>

View File

@ -75,6 +75,7 @@
<decision name="resume_from"> <decision name="resume_from">
<switch> <switch>
<case to="End">${wf:conf('resumeFrom') eq 'Skip'}</case>
<case to="ProcessMAG">${wf:conf('resumeFrom') eq 'PreprocessMag'}</case> <case to="ProcessMAG">${wf:conf('resumeFrom') eq 'PreprocessMag'}</case>
<case to="ProcessUW">${wf:conf('resumeFrom') eq 'PreprocessUW'}</case> <case to="ProcessUW">${wf:conf('resumeFrom') eq 'PreprocessUW'}</case>
<case to="ProcessORCID">${wf:conf('resumeFrom') eq 'ProcessORCID'}</case> <case to="ProcessORCID">${wf:conf('resumeFrom') eq 'ProcessORCID'}</case>