addded command line params to allow hive actions to run

2022-07-29 16:36:20 +03:00 · 2022-07-29 16:36:20 +03:00 · 6fc9ef53f6
parent 0353f93d54
commit 6fc9ef53f6
3 changed files with 16 additions and 10 deletions
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/contexts.sh
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/contexts.sh
@ -9,6 +9,8 @@ fi
 CONTEXT_API=$1
 TARGET_DB=$2
 export HIVE_OPTS="-hiveconf mapred.job.queue.name=analytics -hiveconf hive.spark.client.connect.timeout=120000ms -hiveconf hive.spark.client.server.connect.timeout=300000ms -hiveconf spark.executor.memory=4831838208 -hiveconf spark.yarn.executor.memoryOverhead=450"
 TMP=/tmp/stats-update-`tr -dc A-Za-z0-9 </dev/urandom | head -c 6`
 echo "Downloading context ids"
@ -29,13 +31,16 @@ hdfs dfs -copyFromLocal categories.csv ${TMP}
 hdfs dfs -copyFromLocal concepts.csv ${TMP}
 hdfs dfs -chmod -R 777 ${TMP}
 export HADOOP_USER="antonis.lempesis"
 export HADOOP_USER_NAME="antonis.lempesis"
 echo "Creating and populating impala tables"
-hive -e "create table ${TARGET_DB}.context (id string, name string) row format delimited fields terminated by ','"
+hive $HIVE_OPTS -e "create table ${TARGET_DB}.context (id string, name string) row format delimited fields terminated by ','"
-hive -e "create table ${TARGET_DB}.category (context string, id string, name string) row format delimited fields terminated by ','"
+hive $HIVE_OPTS -e "create table ${TARGET_DB}.category (context string, id string, name string) row format delimited fields terminated by ','"
-hive -e "create table ${TARGET_DB}.concept (category string, id string, name string) row format delimited fields terminated by ','"
+hive $HIVE_OPTS -e "create table ${TARGET_DB}.concept (category string, id string, name string) row format delimited fields terminated by ','"
-hive -e "load data inpath '${TMP}/contexts.csv' into table ${TARGET_DB}.context"
+hive $HIVE_OPTS -e "load data inpath '${TMP}/contexts.csv' into table ${TARGET_DB}.context"
-hive -e "load data inpath '${TMP}/categories.csv' into table ${TARGET_DB}.category"
+hive $HIVE_OPTS -e "load data inpath '${TMP}/categories.csv' into table ${TARGET_DB}.category"
-hive -e "load data inpath '${TMP}/concepts.csv' into table ${TARGET_DB}.concept"
+hive $HIVE_OPTS -e "load data inpath '${TMP}/concepts.csv' into table ${TARGET_DB}.concept"
 echo "Cleaning up"
 rm concepts.csv
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/indicators.sh
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/indicators.sh
@ -8,7 +8,8 @@ fi
 export TARGET=$1
 export SCRIPT_PATH=$2
-export HIVE_OPTS="-hiveconf mapred.job.queue.name=analytics -hiveconf hive.spark.client.connect.timeout=120000ms -hiveconf hive.spark.client.server.connect.timeout=300000ms"
+export HIVE_OPTS="-hiveconf mapred.job.queue.name=analytics -hiveconf hive.spark.client.connect.timeout=120000ms -hiveconf hive.spark.client.server.connect.timeout=300000ms -hiveconf spark.executor.memory=4831838208 -hiveconf spark.yarn.executor.memoryOverhead=450"
 export HADOOP_USER="antonis.lempesis"
 echo "Getting file from " $SCRIPT_PATH
 hdfs dfs -copyToLocal $SCRIPT_PATH
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml
@ -74,7 +74,7 @@
        </configuration>
    </global>
-    <start to="Step16-createIndicatorsTables"/>
+    <start to="Contexts"/>
    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
@ -280,7 +280,7 @@
            <argument>${wf:appPath()}/scripts/step16-createIndicatorsTables.sql</argument>
            <file>indicators.sh</file>
        </shell>
-        <ok to="End"/>
+        <ok to="Step16_1-definitions"/>
        <error to="Kill"/>
    </action>