From 90807b60c7f71a944dde611862cdabc4266c1646 Mon Sep 17 00:00:00 2001 From: dimitrispie Date: Mon, 20 Feb 2023 10:42:24 +0200 Subject: [PATCH] Changes to monitor wf --- .../oozie_app/copyDataToImpalaCluster.sh | 12 ++---- .../oozie_app/finalizeImpalaCluster.sh | 6 --- .../graph/monitor/oozie_app/monitor-post.sh | 20 ++++----- .../dhp/oa/graph/monitor/oozie_app/monitor.sh | 3 +- .../oa/graph/monitor/oozie_app/workflow.xml | 42 ++++++++++++++++++- 5 files changed, 53 insertions(+), 30 deletions(-) diff --git a/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/copyDataToImpalaCluster.sh b/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/copyDataToImpalaCluster.sh index 843877c90..c05d8342a 100644 --- a/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/copyDataToImpalaCluster.sh +++ b/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/copyDataToImpalaCluster.sh @@ -7,7 +7,7 @@ then fi #export HADOOP_USER_NAME="dimitris.pierrakos" -export HADOOP_USER_NAME=$5 +export HADOOP_USER_NAME=$2 function copydb() { db=$1 @@ -49,14 +49,8 @@ function copydb() { hdfs dfs -conf /etc/impala_cluster/hdfs-site.xml -rm -R /tmp/${db}.db } -STATS_DB=$1 -MONITOR_DB=$2 -OBSERVATORY_DB=$3 -EXT_DB=$4 -HADOOP_USER_NAME=$5 +MONITOR_DB=$1 +HADOOP_USER_NAME=$2 -copydb $EXT_DB -copydb $STATS_DB copydb $MONITOR_DB -copydb $OBSERVATORY_DB diff --git a/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/finalizeImpalaCluster.sh b/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/finalizeImpalaCluster.sh index fedfa00af..46eaba6d0 100644 --- a/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/finalizeImpalaCluster.sh +++ b/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/finalizeImpalaCluster.sh @@ -16,13 +16,7 @@ function createShadowDB() { impala-shell -i impala-cluster-dn1.openaire.eu -d ${SOURCE} -q "show tables" --delimited | sed "s/\(.*\)/create view ${SHADOW}.\1 as select * from ${SOURCE}.\1;/" | impala-shell -i impala-cluster-dn1.openaire.eu -f - } -STATS_DB=$1 -STATS_DB_SHADOW=$2 MONITOR_DB=$3 MONITOR_DB_SHADOW=$4 -OBSERVATORY_DB=$5 -OBSERVATORY_DB_SHADOW=$6 -createShadowDB $STATS_DB $STATS_DB_SHADOW createShadowDB $MONITOR_DB $MONITOR_DB_SHADOW -createShadowDB $OBSERVATORY_DB $OBSERVATORY_DB_SHADOW diff --git a/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/monitor-post.sh b/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/monitor-post.sh index dd82310e0..5863625a1 100644 --- a/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/monitor-post.sh +++ b/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/monitor-post.sh @@ -7,15 +7,13 @@ then fi export SOURCE=$1 -export TARGET=$2 -export SHADOW=$3 +export SHADOW=$2 +export HIVE_OPTS="-hiveconf mapred.job.queue.name=analytics -hiveconf hive.spark.client.connect.timeout=120000ms -hiveconf hive.spark.client.server.connect.timeout=300000ms -hiveconf spark.executor.memory=19166291558 -hiveconf spark.yarn.executor.memoryOverhead=3225 -hiveconf spark.driver.memory=11596411699 -hiveconf spark.yarn.driver.memoryOverhead=1228" +export HADOOP_USER_NAME="oozie" -impala-shell -q "invalidate metadata;" -#impala-shell -d ${TARGET} -q "show tables" --delimited | sed "s/\(.*\)/compute stats ${TARGET}.\1;/" | impala-shell -f - -#echo "Impala shell finished" -# -#echo "Updating shadow monitor database" -#impala-shell -q "create database if not exists ${SHADOW}" -#impala-shell -d ${SHADOW} -q "show tables" --delimited | sed "s/^/drop view if exists ${SHADOW}./" | sed "s/$/;/" | impala-shell -f - -#impala-shell -d ${TARGET} -q "show tables" --delimited | sed "s/\(.*\)/create view ${SHADOW}.\1 as select * from ${TARGET}.\1;/" | impala-shell -f - -#echo "Shadow db ready!" \ No newline at end of file +echo "Updating shadow database" +hive -e "drop database if exists ${SHADOW} cascade" +hive -e "create database if not exists ${SHADOW}" +hive $HIVE_OPTS --database ${SOURCE} -e "show tables" | grep -v WARN | sed "s/\(.*\)/create view ${SHADOW}.\1 as select * from ${SOURCE}.\1;/" > foo +hive -f foo +echo "Updated shadow database" \ No newline at end of file diff --git a/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/monitor.sh b/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/monitor.sh index f39bf4893..5e0f68586 100644 --- a/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/monitor.sh +++ b/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/monitor.sh @@ -8,8 +8,7 @@ fi export SOURCE=$1 export TARGET=$2 -export SHADOW=$3 -export SCRIPT_PATH=$4 +export SCRIPT_PATH=$3 export HIVE_OPTS="-hiveconf mapred.job.queue.name=analytics -hiveconf hive.spark.client.connect.timeout=120000ms -hiveconf hive.spark.client.server.connect.timeout=300000ms -hiveconf spark.executor.memory=19166291558 -hiveconf spark.yarn.executor.memoryOverhead=3225 -hiveconf spark.driver.memory=11596411699 -hiveconf spark.yarn.driver.memoryOverhead=1228" export HADOOP_USER_NAME="oozie" diff --git a/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/workflow.xml b/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/workflow.xml index ab51931b6..f24dcc700 100644 --- a/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/workflow.xml @@ -66,7 +66,6 @@ monitor.sh ${stats_db_name} ${monitor_db_name} - ${monitor_db_shadow_name} ${wf:appPath()}/scripts/createMonitorDB.sql monitor.sh @@ -85,9 +84,48 @@ ${monitor_db_shadow_name} monitor-post.sh - + + + + ${jobTracker} + ${nameNode} + copyDataToImpalaCluster.sh + + + ${monitor_db_name} + ${hadoop_user_name} + copyDataToImpalaCluster.sh + + + + + + + + ${jobTracker} + ${nameNode} + finalizeImpalaCluster.sh + ${monitor_db_name} + ${monitor_db_shadow_name} + finalizeImpalaCluster.sh + + + + + + + + ${jobTracker} + ${nameNode} + updateCache.sh + ${stats_tool_api_url} + updateCache.sh + + + + \ No newline at end of file