moving data to impala cluster and creating shadow databases there

This commit is contained in:
Antonis Lempesis 2021-12-13 16:26:14 +02:00 committed by dimitrispie
parent 778a1a724f
commit 2754c3dd62
4 changed files with 37 additions and 27 deletions

View File

@ -11,10 +11,4 @@ export SHADOW=$2
echo "Updating shadow database" echo "Updating shadow database"
hive --database ${SOURCE} -e "show tables" | grep -v WARN | sed "s/^\(.*\)/analyze table ${SOURCE}.\1 compute statistics;/" > foo hive --database ${SOURCE} -e "show tables" | grep -v WARN | sed "s/^\(.*\)/analyze table ${SOURCE}.\1 compute statistics;/" > foo
hive -f foo hive -f foo
hive -e "create database if not exists ${SHADOW}"
hive --database ${SHADOW} -e "show tables" | grep -v WARN | sed "s/^/drop view if exists ${SHADOW}./" | sed "s/$/;/" > foo
hive -f foo
hive --database ${SOURCE} -e "show tables" | grep -v WARN | sed "s/\(.*\)/create view ${SHADOW}.\1 as select * from ${SOURCE}.\1;/" > foo
hive -f foo
echo "Shadow db ready!"

View File

@ -17,12 +17,4 @@ hdfs dfs -copyToLocal $4
echo "Creating monitor database" echo "Creating monitor database"
cat step20-createMonitorDB.sql | sed s/SOURCE/$1/g | sed s/TARGET/$2/g1 > foo cat step20-createMonitorDB.sql | sed s/SOURCE/$1/g | sed s/TARGET/$2/g1 > foo
hive -f foo hive -f foo
echo "Impala shell finished" echo "Impala shell finished"
echo "Updating shadow monitor database"
hive -e "create database if not exists ${SHADOW}"
hive --database ${SHADOW} -e "show tables" | grep -v WARN | sed "s/^/drop view if exists ${SHADOW}./" | sed "s/$/;/" > foo
hive -f foo
hive --database ${TARGET} -e "show tables" | grep -v WARN | sed "s/\(.*\)/create view ${SHADOW}.\1 as select * from ${TARGET}.\1;/" > foo
hive -f foo
echo "Shadow db ready!"

View File

@ -12,12 +12,4 @@ export SHADOW=$3
hive --database ${TARGET} -e "show tables" | grep -v WARN | sed "s/\(.*\)/analyze table ${TARGET}.\1 compute statistics;/" > foo hive --database ${TARGET} -e "show tables" | grep -v WARN | sed "s/\(.*\)/analyze table ${TARGET}.\1 compute statistics;/" > foo
hive -f foo hive -f foo
echo "Impala shell finished" echo "Impala shell finished"
echo "Updating shadow observatory database"
hive -e "create database if not exists ${SHADOW}"
hive --database ${SHADOW} -e "show tables" | grep -v WARN | sed "s/^/drop view if exists ${SHADOW}./" | sed "s/$/;/" > foo
hive -f foo
hive -d ${TARGET} -e "show tables" | grep -v WARN | sed "s/\(.*\)/create view ${SHADOW}.\1 as select * from ${TARGET}.\1;/" > foo
hive -f foo
echo "Shadow db ready!"

View File

@ -365,11 +365,43 @@
<argument>${observatory_db_shadow_name}</argument> <argument>${observatory_db_shadow_name}</argument>
<file>observatory-post.sh</file> <file>observatory-post.sh</file>
</shell> </shell>
<ok to="Step22"/> <ok to="step22-copyDataToImpalaCluster"/>
<error to="Kill"/> <error to="Kill"/>
</action> </action>
<action name="Step22"> <action name="step22-copyDataToImpalaCluster">
<shell xmlns="uri:oozie:shell-action:0.1">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<exec>copyDataToImpalaCluster.sh</exec>
<argument>${external_stats_db_name}</argument>
<argument>${stats_db_name}</argument>
<argument>${monitor_db_name}</argument>
<argument>${observatory_db_name}</argument>
<file>copyDataToImpalaCluster.sh</file>
</shell>
<ok to="step23-finalizeImpalaCluster"/>
<error to="Kill"/>
</action>
<action name="step23-finalizeImpalaCluster">
<shell xmlns="uri:oozie:shell-action:0.1">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<exec>finalizeImpalaCluster.sh</exec>
<argument>${stats_db_name}</argument>
<argument>${stats_db_shadow_name}</argument>
<argument>${monitor_db_name}</argument>
<argument>${monitor_db_shadow_name}</argument>
<argument>${observatory_db_name}</argument>
<argument>${observatory_db_shadow_name}</argument>
<file>finalizeImpalaCluster.sh</file>
</shell>
<ok to="Step24-updateCache"/>
<error to="Kill"/>
</action>
<action name="Step24-updateCache">
<shell xmlns="uri:oozie:shell-action:0.1"> <shell xmlns="uri:oozie:shell-action:0.1">
<job-tracker>${jobTracker}</job-tracker> <job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node> <name-node>${nameNode}</name-node>