Stats wf executed on hive only #283
|
@ -7,7 +7,7 @@ then
|
|||
fi
|
||||
|
||||
#export HADOOP_USER_NAME="dimitris.pierrakos"
|
||||
export HADOOP_USER_NAME=$5
|
||||
export HADOOP_USER_NAME=$2
|
||||
|
||||
function copydb() {
|
||||
db=$1
|
||||
|
@ -49,14 +49,8 @@ function copydb() {
|
|||
hdfs dfs -conf /etc/impala_cluster/hdfs-site.xml -rm -R /tmp/${db}.db
|
||||
}
|
||||
|
||||
STATS_DB=$1
|
||||
MONITOR_DB=$2
|
||||
OBSERVATORY_DB=$3
|
||||
EXT_DB=$4
|
||||
HADOOP_USER_NAME=$5
|
||||
MONITOR_DB=$1
|
||||
HADOOP_USER_NAME=$2
|
||||
|
||||
copydb $EXT_DB
|
||||
copydb $STATS_DB
|
||||
copydb $MONITOR_DB
|
||||
copydb $OBSERVATORY_DB
|
||||
|
||||
|
|
|
@ -16,13 +16,7 @@ function createShadowDB() {
|
|||
impala-shell -i impala-cluster-dn1.openaire.eu -d ${SOURCE} -q "show tables" --delimited | sed "s/\(.*\)/create view ${SHADOW}.\1 as select * from ${SOURCE}.\1;/" | impala-shell -i impala-cluster-dn1.openaire.eu -f -
|
||||
}
|
||||
|
||||
STATS_DB=$1
|
||||
STATS_DB_SHADOW=$2
|
||||
MONITOR_DB=$3
|
||||
MONITOR_DB_SHADOW=$4
|
||||
OBSERVATORY_DB=$5
|
||||
OBSERVATORY_DB_SHADOW=$6
|
||||
|
||||
createShadowDB $STATS_DB $STATS_DB_SHADOW
|
||||
createShadowDB $MONITOR_DB $MONITOR_DB_SHADOW
|
||||
createShadowDB $OBSERVATORY_DB $OBSERVATORY_DB_SHADOW
|
||||
|
|
|
@ -7,15 +7,13 @@ then
|
|||
fi
|
||||
|
||||
export SOURCE=$1
|
||||
export TARGET=$2
|
||||
export SHADOW=$3
|
||||
export SHADOW=$2
|
||||
export HIVE_OPTS="-hiveconf mapred.job.queue.name=analytics -hiveconf hive.spark.client.connect.timeout=120000ms -hiveconf hive.spark.client.server.connect.timeout=300000ms -hiveconf spark.executor.memory=19166291558 -hiveconf spark.yarn.executor.memoryOverhead=3225 -hiveconf spark.driver.memory=11596411699 -hiveconf spark.yarn.driver.memoryOverhead=1228"
|
||||
export HADOOP_USER_NAME="oozie"
|
||||
|
||||
impala-shell -q "invalidate metadata;"
|
||||
#impala-shell -d ${TARGET} -q "show tables" --delimited | sed "s/\(.*\)/compute stats ${TARGET}.\1;/" | impala-shell -f -
|
||||
#echo "Impala shell finished"
|
||||
#
|
||||
#echo "Updating shadow monitor database"
|
||||
#impala-shell -q "create database if not exists ${SHADOW}"
|
||||
#impala-shell -d ${SHADOW} -q "show tables" --delimited | sed "s/^/drop view if exists ${SHADOW}./" | sed "s/$/;/" | impala-shell -f -
|
||||
#impala-shell -d ${TARGET} -q "show tables" --delimited | sed "s/\(.*\)/create view ${SHADOW}.\1 as select * from ${TARGET}.\1;/" | impala-shell -f -
|
||||
#echo "Shadow db ready!"
|
||||
echo "Updating shadow database"
|
||||
hive -e "drop database if exists ${SHADOW} cascade"
|
||||
hive -e "create database if not exists ${SHADOW}"
|
||||
hive $HIVE_OPTS --database ${SOURCE} -e "show tables" | grep -v WARN | sed "s/\(.*\)/create view ${SHADOW}.\1 as select * from ${SOURCE}.\1;/" > foo
|
||||
hive -f foo
|
||||
echo "Updated shadow database"
|
|
@ -8,8 +8,7 @@ fi
|
|||
|
||||
export SOURCE=$1
|
||||
export TARGET=$2
|
||||
export SHADOW=$3
|
||||
export SCRIPT_PATH=$4
|
||||
export SCRIPT_PATH=$3
|
||||
|
||||
export HIVE_OPTS="-hiveconf mapred.job.queue.name=analytics -hiveconf hive.spark.client.connect.timeout=120000ms -hiveconf hive.spark.client.server.connect.timeout=300000ms -hiveconf spark.executor.memory=19166291558 -hiveconf spark.yarn.executor.memoryOverhead=3225 -hiveconf spark.driver.memory=11596411699 -hiveconf spark.yarn.driver.memoryOverhead=1228"
|
||||
export HADOOP_USER_NAME="oozie"
|
||||
|
|
|
@ -66,7 +66,6 @@
|
|||
<exec>monitor.sh</exec>
|
||||
<argument>${stats_db_name}</argument>
|
||||
<argument>${monitor_db_name}</argument>
|
||||
<argument>${monitor_db_shadow_name}</argument>
|
||||
<argument>${wf:appPath()}/scripts/createMonitorDB.sql</argument>
|
||||
<file>monitor.sh</file>
|
||||
</shell>
|
||||
|
@ -85,9 +84,48 @@
|
|||
<argument>${monitor_db_shadow_name}</argument>
|
||||
<file>monitor-post.sh</file>
|
||||
</shell>
|
||||
<ok to="End"/>
|
||||
<ok to="Step3-copyDataToImpalaCluster"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="Step3-copyDataToImpalaCluster">
|
||||
<shell xmlns="uri:oozie:shell-action:0.1">
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<exec>copyDataToImpalaCluster.sh</exec>
|
||||
<!-- <env-var>HADOOP_USER_NAME=${wf:user()}</env-var>-->
|
||||
<!-- <argument>${external_stats_db_name}</argument>-->
|
||||
<argument>${monitor_db_name}</argument>
|
||||
<argument>${hadoop_user_name}</argument>
|
||||
<file>copyDataToImpalaCluster.sh</file>
|
||||
</shell>
|
||||
<ok to="Step4-finalizeImpalaCluster"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="Step4-finalizeImpalaCluster">
|
||||
<shell xmlns="uri:oozie:shell-action:0.1">
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<exec>finalizeImpalaCluster.sh</exec>
|
||||
<argument>${monitor_db_name}</argument>
|
||||
<argument>${monitor_db_shadow_name}</argument>
|
||||
<file>finalizeImpalaCluster.sh</file>
|
||||
</shell>
|
||||
<ok to="Step5-updateCache"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="Step5-updateCache">
|
||||
<shell xmlns="uri:oozie:shell-action:0.1">
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<exec>updateCache.sh</exec>
|
||||
<argument>${stats_tool_api_url}</argument>
|
||||
<file>updateCache.sh</file>
|
||||
</shell>
|
||||
<ok to="End"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
<end name="End"/>
|
||||
</workflow-app>
|
Loading…
Reference in New Issue