forked from D-Net/dnet-hadoop
changed the download.sh, added skip step to allow to not execute one phase and changed the workflow sequence of steps
This commit is contained in:
parent
acd6056330
commit
c4b18e6ccb
|
@ -1,2 +1,2 @@
|
||||||
#!bin/bash
|
#!bin/bash
|
||||||
curl -LSs $1 | hdfs dfs -put - $2$3
|
curl -LSs $1 | hdfs dfs -put - $2/$3
|
|
@ -70,7 +70,7 @@
|
||||||
<case to="ResetMagWorkingPath">${wf:conf('resumeFrom') eq 'ResetMagWorkingPath'}</case>
|
<case to="ResetMagWorkingPath">${wf:conf('resumeFrom') eq 'ResetMagWorkingPath'}</case>
|
||||||
<case to="ConvertMagToDataset">${wf:conf('resumeFrom') eq 'ConvertMagToDataset'}</case>
|
<case to="ConvertMagToDataset">${wf:conf('resumeFrom') eq 'ConvertMagToDataset'}</case>
|
||||||
<case to="PreProcessORCID">${wf:conf('resumeFrom') eq 'PreProcessORCID'}</case>
|
<case to="PreProcessORCID">${wf:conf('resumeFrom') eq 'PreProcessORCID'}</case>
|
||||||
<default to="DownloadDump"/>
|
<default to="removeFiles"/> <!-- first action to be done when downloadDump is to be performed -->
|
||||||
</switch>
|
</switch>
|
||||||
</decision>
|
</decision>
|
||||||
|
|
||||||
|
@ -78,6 +78,7 @@
|
||||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||||
</kill>
|
</kill>
|
||||||
|
|
||||||
|
|
||||||
<action name="DownloadDump">
|
<action name="DownloadDump">
|
||||||
<shell xmlns="uri:oozie:shell-action:0.2">
|
<shell xmlns="uri:oozie:shell-action:0.2">
|
||||||
<job-tracker>${jobTracker}</job-tracker>
|
<job-tracker>${jobTracker}</job-tracker>
|
||||||
|
@ -105,7 +106,7 @@
|
||||||
<name-node>${nameNode}</name-node>
|
<name-node>${nameNode}</name-node>
|
||||||
<main-class>eu.dnetlib.doiboost.crossref.ExtractCrossrefRecords</main-class>
|
<main-class>eu.dnetlib.doiboost.crossref.ExtractCrossrefRecords</main-class>
|
||||||
<arg>--hdfsServerUri</arg><arg>${nameNode}</arg>
|
<arg>--hdfsServerUri</arg><arg>${nameNode}</arg>
|
||||||
<arg>--crossrefFileNameTarGz</arg><arg>${crossrefDumpPath}/crossref.tar.gz</arg>
|
<arg>--crossrefFileNameTarGz</arg><arg>${crossrefdumpfilename}</arg>
|
||||||
<arg>--workingPath</arg><arg>${crossrefDumpPath}</arg>
|
<arg>--workingPath</arg><arg>${crossrefDumpPath}</arg>
|
||||||
<arg>--outputPath</arg><arg>${crossrefDumpPath}/files/</arg>
|
<arg>--outputPath</arg><arg>${crossrefDumpPath}/files/</arg>
|
||||||
</java>
|
</java>
|
||||||
|
@ -161,16 +162,16 @@
|
||||||
<arg>--targetPath</arg><arg>${inputPathCrossref}/crossref_ds</arg>
|
<arg>--targetPath</arg><arg>${inputPathCrossref}/crossref_ds</arg>
|
||||||
|
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="removeFiles"/>
|
<ok to="ResetMagWorkingPath"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
<action name="removeFiles">
|
<action name="removeFiles">
|
||||||
<fs>
|
<fs>
|
||||||
<!-- <delete path="${crossrefDumpPath}/files"/>-->
|
<delete path="${crossrefDumpPath}/files"/>
|
||||||
<delete path="${crossrefDumpPath}/crossref_unpack/"/>
|
<delete path="${crossrefDumpPath}/crossref_unpack/"/>
|
||||||
</fs>
|
</fs>
|
||||||
<ok to="ResetMagWorkingPath"/>
|
<ok to="DownloadDump"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
|
|
|
@ -75,6 +75,7 @@
|
||||||
|
|
||||||
<decision name="resume_from">
|
<decision name="resume_from">
|
||||||
<switch>
|
<switch>
|
||||||
|
<case to="End">${wf:conf('resumeFrom') eq 'Skip'}</case>
|
||||||
<case to="ProcessMAG">${wf:conf('resumeFrom') eq 'PreprocessMag'}</case>
|
<case to="ProcessMAG">${wf:conf('resumeFrom') eq 'PreprocessMag'}</case>
|
||||||
<case to="ProcessUW">${wf:conf('resumeFrom') eq 'PreprocessUW'}</case>
|
<case to="ProcessUW">${wf:conf('resumeFrom') eq 'PreprocessUW'}</case>
|
||||||
<case to="ProcessORCID">${wf:conf('resumeFrom') eq 'ProcessORCID'}</case>
|
<case to="ProcessORCID">${wf:conf('resumeFrom') eq 'ProcessORCID'}</case>
|
||||||
|
|
Loading…
Reference in New Issue