forked from D-Net/dnet-hadoop
new script to nquads download from scraping service
This commit is contained in:
parent
18c9b95cb1
commit
64bc955444
|
@ -2,12 +2,12 @@
|
|||
<parameters>
|
||||
<property>
|
||||
<name>mainPath</name>
|
||||
<value>/data/bioschema/ped</value>
|
||||
<value>/data/bioschema/mobidb</value>
|
||||
<description>the working path of Bioschema stores</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>datasourceKey</name>
|
||||
<value>ped</value>
|
||||
<value>mobidb</value>
|
||||
<description>the key that identifies the datasource (eg ped, disprot, mobidb)</description>
|
||||
</property>
|
||||
<property>
|
||||
|
|
|
@ -0,0 +1,3 @@
|
|||
wget -O /tmp/base64_gzipped_nquads.txt https://hadoop-bioschemas-ds.garr-pa1.d4science.org/bioschemas-api/api/getNQuads?datasourceKey=$1
|
||||
hdfs dfs -copyFromLocal /tmp/base64_gzipped_nquads.txt /data/bioschema/mobidb
|
||||
rm -f /tmp/base64_gzipped_nquads.txt
|
|
@ -1,5 +1,10 @@
|
|||
<workflow-app name="RdfConverter" xmlns="uri:oozie:workflow:0.5">
|
||||
<parameters>
|
||||
<property>
|
||||
<name>bioschemas_datasource_key</name>
|
||||
<value>mobidb</value>
|
||||
<description>bioschemas datasource key (i.e. mobidb, ped, disprot)</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>workingPath</name>
|
||||
<value>/data/bioschema/mobidb/</value>
|
||||
|
@ -57,11 +62,24 @@
|
|||
</property>
|
||||
</parameters>
|
||||
|
||||
<start to="ResetWorkingPath"/>
|
||||
<start to="DownloadNQuads"/>
|
||||
<kill name="Kill">
|
||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||
</kill>
|
||||
|
||||
<action name="DownloadNQuads">
|
||||
<shell xmlns="uri:oozie:shell-action:0.1">
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<exec>download_nquads.sh</exec>
|
||||
<argument>${bioschemas_datasource_key}</argument>
|
||||
<file>download_nquads.sh</file>
|
||||
<capture-output/>
|
||||
</shell>
|
||||
<ok to="ResetWorkingPath"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="ResetWorkingPath">
|
||||
<fs>
|
||||
<delete path='${workingPath}${output}'/>
|
||||
|
|
Loading…
Reference in New Issue