new script to nquads download from scraping service

This commit is contained in:
Enrico Ottonello 2022-07-19 12:12:01 +02:00
parent 18c9b95cb1
commit 64bc955444
3 changed files with 24 additions and 3 deletions

View File

@ -2,12 +2,12 @@
<parameters> <parameters>
<property> <property>
<name>mainPath</name> <name>mainPath</name>
<value>/data/bioschema/ped</value> <value>/data/bioschema/mobidb</value>
<description>the working path of Bioschema stores</description> <description>the working path of Bioschema stores</description>
</property> </property>
<property> <property>
<name>datasourceKey</name> <name>datasourceKey</name>
<value>ped</value> <value>mobidb</value>
<description>the key that identifies the datasource (eg ped, disprot, mobidb)</description> <description>the key that identifies the datasource (eg ped, disprot, mobidb)</description>
</property> </property>
<property> <property>

View File

@ -0,0 +1,3 @@
wget -O /tmp/base64_gzipped_nquads.txt https://hadoop-bioschemas-ds.garr-pa1.d4science.org/bioschemas-api/api/getNQuads?datasourceKey=$1
hdfs dfs -copyFromLocal /tmp/base64_gzipped_nquads.txt /data/bioschema/mobidb
rm -f /tmp/base64_gzipped_nquads.txt

View File

@ -1,5 +1,10 @@
<workflow-app name="RdfConverter" xmlns="uri:oozie:workflow:0.5"> <workflow-app name="RdfConverter" xmlns="uri:oozie:workflow:0.5">
<parameters> <parameters>
<property>
<name>bioschemas_datasource_key</name>
<value>mobidb</value>
<description>bioschemas datasource key (i.e. mobidb, ped, disprot)</description>
</property>
<property> <property>
<name>workingPath</name> <name>workingPath</name>
<value>/data/bioschema/mobidb/</value> <value>/data/bioschema/mobidb/</value>
@ -57,11 +62,24 @@
</property> </property>
</parameters> </parameters>
<start to="ResetWorkingPath"/> <start to="DownloadNQuads"/>
<kill name="Kill"> <kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message> <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill> </kill>
<action name="DownloadNQuads">
<shell xmlns="uri:oozie:shell-action:0.1">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<exec>download_nquads.sh</exec>
<argument>${bioschemas_datasource_key}</argument>
<file>download_nquads.sh</file>
<capture-output/>
</shell>
<ok to="ResetWorkingPath"/>
<error to="Kill"/>
</action>
<action name="ResetWorkingPath"> <action name="ResetWorkingPath">
<fs> <fs>
<delete path='${workingPath}${output}'/> <delete path='${workingPath}${output}'/>