added shell action to automatically download the new dump and put it in a specified hdfs location

This commit is contained in:
Miriam Baglioni 2021-07-16 12:47:10 +02:00
parent bf9e0d2d4f
commit acd6056330
2 changed files with 26 additions and 1 deletions

View File

@ -0,0 +1,2 @@
#!bin/bash
curl -LSs $1 | hdfs dfs -put - $2$3

View File

@ -63,12 +63,14 @@
<decision name="resume_from">
<switch>
<case to="End">${wf:conf('resumeFrom') eq 'Skip'}</case>
<case to="ImportCrossRef">${wf:conf('resumeFrom') eq 'ImportCrossRef'}</case>
<case to="UnpackCrossrefEntries">${wf:conf('resumeFrom') eq 'UnpackCrossrefEntries'}</case>
<case to="GenerateCrossrefDataset">${wf:conf('resumeFrom') eq 'GenerateCrossrefDataset'}</case>
<case to="ResetMagWorkingPath">${wf:conf('resumeFrom') eq 'ResetMagWorkingPath'}</case>
<case to="ConvertMagToDataset">${wf:conf('resumeFrom') eq 'ConvertMagToDataset'}</case>
<case to="PreProcessORCID">${wf:conf('resumeFrom') eq 'PreProcessORCID'}</case>
<default to="ImportCrossRef"/>
<default to="DownloadDump"/>
</switch>
</decision>
@ -76,6 +78,27 @@
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<action name="DownloadDump">
<shell xmlns="uri:oozie:shell-action:0.2">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<configuration>
<property>
<name>mapred.job.queue.name</name>
<value>${queueName}</value>
</property>
</configuration>
<exec>download.sh</exec>
<argument>${url}</argument>
<argument>${crossrefDumpPath}</argument>
<argument>${crossrefdumpfilename}</argument>
<file>download.sh</file>
<capture-output/>
</shell>
<ok to="ImportCrossRef"/>
<error to="Kill"/>
</action>
<action name="ImportCrossRef">
<java>
<job-tracker>${jobTracker}</job-tracker>