new script to nquads download from scraping service

This commit is contained in:
Enrico Ottonello 2022-07-19 12:12:01 +02:00
parent 18c9b95cb1
commit 64bc955444
3 changed files with 24 additions and 3 deletions

View File

@ -2,12 +2,12 @@
<parameters>
<property>
<name>mainPath</name>
<value>/data/bioschema/ped</value>
<value>/data/bioschema/mobidb</value>
<description>the working path of Bioschema stores</description>
</property>
<property>
<name>datasourceKey</name>
<value>ped</value>
<value>mobidb</value>
<description>the key that identifies the datasource (eg ped, disprot, mobidb)</description>
</property>
<property>

View File

@ -0,0 +1,3 @@
wget -O /tmp/base64_gzipped_nquads.txt https://hadoop-bioschemas-ds.garr-pa1.d4science.org/bioschemas-api/api/getNQuads?datasourceKey=$1
hdfs dfs -copyFromLocal /tmp/base64_gzipped_nquads.txt /data/bioschema/mobidb
rm -f /tmp/base64_gzipped_nquads.txt

View File

@ -1,5 +1,10 @@
<workflow-app name="RdfConverter" xmlns="uri:oozie:workflow:0.5">
<parameters>
<property>
<name>bioschemas_datasource_key</name>
<value>mobidb</value>
<description>bioschemas datasource key (i.e. mobidb, ped, disprot)</description>
</property>
<property>
<name>workingPath</name>
<value>/data/bioschema/mobidb/</value>
@ -57,11 +62,24 @@
</property>
</parameters>
<start to="ResetWorkingPath"/>
<start to="DownloadNQuads"/>
<kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<action name="DownloadNQuads">
<shell xmlns="uri:oozie:shell-action:0.1">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<exec>download_nquads.sh</exec>
<argument>${bioschemas_datasource_key}</argument>
<file>download_nquads.sh</file>
<capture-output/>
</shell>
<ok to="ResetWorkingPath"/>
<error to="Kill"/>
</action>
<action name="ResetWorkingPath">
<fs>
<delete path='${workingPath}${output}'/>