From 64bc955444d2acd8fc9afbec3224ff9305b9a287 Mon Sep 17 00:00:00 2001 From: Enrico Ottonello Date: Tue, 19 Jul 2022 12:12:01 +0200 Subject: [PATCH] new script to nquads download from scraping service --- .../dhp/bioschema/oozie_app/workflow.xml | 4 ++-- .../bioschema/oozie_app/download_nquads.sh | 3 +++ .../bioschema/oozie_app/workflow.xml | 20 ++++++++++++++++++- 3 files changed, 24 insertions(+), 3 deletions(-) create mode 100644 dhp-workflows/dhp-rdfconverter/src/main/resources/eu/dnetlib/dhp/rdfconverter/bioschema/oozie_app/download_nquads.sh diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/bioschema/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/bioschema/oozie_app/workflow.xml index 43f39f6c2..deacb878b 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/bioschema/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/bioschema/oozie_app/workflow.xml @@ -2,12 +2,12 @@ mainPath - /data/bioschema/ped + /data/bioschema/mobidb the working path of Bioschema stores datasourceKey - ped + mobidb the key that identifies the datasource (eg ped, disprot, mobidb) diff --git a/dhp-workflows/dhp-rdfconverter/src/main/resources/eu/dnetlib/dhp/rdfconverter/bioschema/oozie_app/download_nquads.sh b/dhp-workflows/dhp-rdfconverter/src/main/resources/eu/dnetlib/dhp/rdfconverter/bioschema/oozie_app/download_nquads.sh new file mode 100644 index 000000000..019241fa7 --- /dev/null +++ b/dhp-workflows/dhp-rdfconverter/src/main/resources/eu/dnetlib/dhp/rdfconverter/bioschema/oozie_app/download_nquads.sh @@ -0,0 +1,3 @@ +wget -O /tmp/base64_gzipped_nquads.txt https://hadoop-bioschemas-ds.garr-pa1.d4science.org/bioschemas-api/api/getNQuads?datasourceKey=$1 +hdfs dfs -copyFromLocal /tmp/base64_gzipped_nquads.txt /data/bioschema/mobidb +rm -f /tmp/base64_gzipped_nquads.txt \ No newline at end of file diff --git a/dhp-workflows/dhp-rdfconverter/src/main/resources/eu/dnetlib/dhp/rdfconverter/bioschema/oozie_app/workflow.xml b/dhp-workflows/dhp-rdfconverter/src/main/resources/eu/dnetlib/dhp/rdfconverter/bioschema/oozie_app/workflow.xml index 821fbf9bd..7305a2740 100644 --- a/dhp-workflows/dhp-rdfconverter/src/main/resources/eu/dnetlib/dhp/rdfconverter/bioschema/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-rdfconverter/src/main/resources/eu/dnetlib/dhp/rdfconverter/bioschema/oozie_app/workflow.xml @@ -1,5 +1,10 @@ + + bioschemas_datasource_key + mobidb + bioschemas datasource key (i.e. mobidb, ped, disprot) + workingPath /data/bioschema/mobidb/ @@ -57,11 +62,24 @@ - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + ${jobTracker} + ${nameNode} + download_nquads.sh + ${bioschemas_datasource_key} + download_nquads.sh + + + + + +