Stats wf executed on hive only #283

Merged
claudio.atzori merged 45 commits from antonis.lempesis/dnet-hadoop:beta into beta 2023-05-02 14:05:13 +02:00
5 changed files with 53 additions and 30 deletions
Showing only changes of commit 90807b60c7 - Show all commits

View File

@ -7,7 +7,7 @@ then
fi fi
#export HADOOP_USER_NAME="dimitris.pierrakos" #export HADOOP_USER_NAME="dimitris.pierrakos"
export HADOOP_USER_NAME=$5 export HADOOP_USER_NAME=$2
function copydb() { function copydb() {
db=$1 db=$1
@ -49,14 +49,8 @@ function copydb() {
hdfs dfs -conf /etc/impala_cluster/hdfs-site.xml -rm -R /tmp/${db}.db hdfs dfs -conf /etc/impala_cluster/hdfs-site.xml -rm -R /tmp/${db}.db
} }
STATS_DB=$1 MONITOR_DB=$1
MONITOR_DB=$2 HADOOP_USER_NAME=$2
OBSERVATORY_DB=$3
EXT_DB=$4
HADOOP_USER_NAME=$5
copydb $EXT_DB
copydb $STATS_DB
copydb $MONITOR_DB copydb $MONITOR_DB
copydb $OBSERVATORY_DB

View File

@ -16,13 +16,7 @@ function createShadowDB() {
impala-shell -i impala-cluster-dn1.openaire.eu -d ${SOURCE} -q "show tables" --delimited | sed "s/\(.*\)/create view ${SHADOW}.\1 as select * from ${SOURCE}.\1;/" | impala-shell -i impala-cluster-dn1.openaire.eu -f - impala-shell -i impala-cluster-dn1.openaire.eu -d ${SOURCE} -q "show tables" --delimited | sed "s/\(.*\)/create view ${SHADOW}.\1 as select * from ${SOURCE}.\1;/" | impala-shell -i impala-cluster-dn1.openaire.eu -f -
} }
STATS_DB=$1
STATS_DB_SHADOW=$2
MONITOR_DB=$3 MONITOR_DB=$3
MONITOR_DB_SHADOW=$4 MONITOR_DB_SHADOW=$4
OBSERVATORY_DB=$5
OBSERVATORY_DB_SHADOW=$6
createShadowDB $STATS_DB $STATS_DB_SHADOW
createShadowDB $MONITOR_DB $MONITOR_DB_SHADOW createShadowDB $MONITOR_DB $MONITOR_DB_SHADOW
createShadowDB $OBSERVATORY_DB $OBSERVATORY_DB_SHADOW

View File

@ -7,15 +7,13 @@ then
fi fi
export SOURCE=$1 export SOURCE=$1
export TARGET=$2 export SHADOW=$2
export SHADOW=$3 export HIVE_OPTS="-hiveconf mapred.job.queue.name=analytics -hiveconf hive.spark.client.connect.timeout=120000ms -hiveconf hive.spark.client.server.connect.timeout=300000ms -hiveconf spark.executor.memory=19166291558 -hiveconf spark.yarn.executor.memoryOverhead=3225 -hiveconf spark.driver.memory=11596411699 -hiveconf spark.yarn.driver.memoryOverhead=1228"
export HADOOP_USER_NAME="oozie"
impala-shell -q "invalidate metadata;" echo "Updating shadow database"
#impala-shell -d ${TARGET} -q "show tables" --delimited | sed "s/\(.*\)/compute stats ${TARGET}.\1;/" | impala-shell -f - hive -e "drop database if exists ${SHADOW} cascade"
#echo "Impala shell finished" hive -e "create database if not exists ${SHADOW}"
# hive $HIVE_OPTS --database ${SOURCE} -e "show tables" | grep -v WARN | sed "s/\(.*\)/create view ${SHADOW}.\1 as select * from ${SOURCE}.\1;/" > foo
#echo "Updating shadow monitor database" hive -f foo
#impala-shell -q "create database if not exists ${SHADOW}" echo "Updated shadow database"
#impala-shell -d ${SHADOW} -q "show tables" --delimited | sed "s/^/drop view if exists ${SHADOW}./" | sed "s/$/;/" | impala-shell -f -
#impala-shell -d ${TARGET} -q "show tables" --delimited | sed "s/\(.*\)/create view ${SHADOW}.\1 as select * from ${TARGET}.\1;/" | impala-shell -f -
#echo "Shadow db ready!"

View File

@ -8,8 +8,7 @@ fi
export SOURCE=$1 export SOURCE=$1
export TARGET=$2 export TARGET=$2
export SHADOW=$3 export SCRIPT_PATH=$3
export SCRIPT_PATH=$4
export HIVE_OPTS="-hiveconf mapred.job.queue.name=analytics -hiveconf hive.spark.client.connect.timeout=120000ms -hiveconf hive.spark.client.server.connect.timeout=300000ms -hiveconf spark.executor.memory=19166291558 -hiveconf spark.yarn.executor.memoryOverhead=3225 -hiveconf spark.driver.memory=11596411699 -hiveconf spark.yarn.driver.memoryOverhead=1228" export HIVE_OPTS="-hiveconf mapred.job.queue.name=analytics -hiveconf hive.spark.client.connect.timeout=120000ms -hiveconf hive.spark.client.server.connect.timeout=300000ms -hiveconf spark.executor.memory=19166291558 -hiveconf spark.yarn.executor.memoryOverhead=3225 -hiveconf spark.driver.memory=11596411699 -hiveconf spark.yarn.driver.memoryOverhead=1228"
export HADOOP_USER_NAME="oozie" export HADOOP_USER_NAME="oozie"

View File

@ -66,7 +66,6 @@
<exec>monitor.sh</exec> <exec>monitor.sh</exec>
<argument>${stats_db_name}</argument> <argument>${stats_db_name}</argument>
<argument>${monitor_db_name}</argument> <argument>${monitor_db_name}</argument>
<argument>${monitor_db_shadow_name}</argument>
<argument>${wf:appPath()}/scripts/createMonitorDB.sql</argument> <argument>${wf:appPath()}/scripts/createMonitorDB.sql</argument>
<file>monitor.sh</file> <file>monitor.sh</file>
</shell> </shell>
@ -85,9 +84,48 @@
<argument>${monitor_db_shadow_name}</argument> <argument>${monitor_db_shadow_name}</argument>
<file>monitor-post.sh</file> <file>monitor-post.sh</file>
</shell> </shell>
<ok to="End"/> <ok to="Step3-copyDataToImpalaCluster"/>
<error to="Kill"/> <error to="Kill"/>
</action> </action>
<action name="Step3-copyDataToImpalaCluster">
<shell xmlns="uri:oozie:shell-action:0.1">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<exec>copyDataToImpalaCluster.sh</exec>
<!-- <env-var>HADOOP_USER_NAME=${wf:user()}</env-var>-->
<!-- <argument>${external_stats_db_name}</argument>-->
<argument>${monitor_db_name}</argument>
<argument>${hadoop_user_name}</argument>
<file>copyDataToImpalaCluster.sh</file>
</shell>
<ok to="Step4-finalizeImpalaCluster"/>
<error to="Kill"/>
</action>
<action name="Step4-finalizeImpalaCluster">
<shell xmlns="uri:oozie:shell-action:0.1">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<exec>finalizeImpalaCluster.sh</exec>
<argument>${monitor_db_name}</argument>
<argument>${monitor_db_shadow_name}</argument>
<file>finalizeImpalaCluster.sh</file>
</shell>
<ok to="Step5-updateCache"/>
<error to="Kill"/>
</action>
<action name="Step5-updateCache">
<shell xmlns="uri:oozie:shell-action:0.1">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<exec>updateCache.sh</exec>
<argument>${stats_tool_api_url}</argument>
<file>updateCache.sh</file>
</shell>
<ok to="End"/>
<error to="Kill"/>
</action>
<end name="End"/> <end name="End"/>
</workflow-app> </workflow-app>