updates on stats-update workflow #100

Merged
claudio.atzori merged 8 commits from :master into master 2021-04-02 16:13:36 +02:00
3 changed files with 32 additions and 15 deletions
Showing only changes of commit 3c75a05044 - Show all commits

View File

@ -1,8 +0,0 @@
------------------------------------------------------
------------------------------------------------------
-- Impala table statistics - Needed to make the tables
-- visible for impala
------------------------------------------------------
------------------------------------------------------
INVALIDATE METADATA ${stats_db_name};

View File

@ -0,0 +1,18 @@
export PYTHON_EGG_CACHE=/home/$(whoami)/.python-eggs
export link_folder=/tmp/impala-shell-python-egg-cache-$(whoami)
if ! [ -L $link_folder ]
then
rm -Rf "$link_folder"
ln -sfn ${PYTHON_EGG_CACHE}${link_folder} ${link_folder}
fi
export SOURCE=$1
export SHADOW=$2
echo "Updating shadow database"
impala-shell -d ${SOURCE} -q "invalidate metadata"
impala-shell -d ${SOURCE} -q "show tables" --delimited | sed "s/^\(.*\)/compute stats ${SOURCE}.\1;/" | impala-shell -c -f -
impala-shell -q "create database if not exists ${SHADOW}"
impala-shell -d ${SHADOW} -q "show tables" --delimited | sed "s/^/drop view if exists ${SHADOW}./" | sed "s/$/;/" | impala-shell -c -f -
impala-shell -d ${SOURCE} -q "show tables" --delimited | sed "s/\(.*\)/create view ${SHADOW}.\1 as select * from ${SOURCE}.\1;/" | impala-shell -c -f -
echo "Shadow db ready!"

View File

@ -1,4 +1,10 @@
#!/usr/bin/env bash export PYTHON_EGG_CACHE=/home/$(whoami)/.python-eggs
export link_folder=/tmp/impala-shell-python-egg-cache-$(whoami)
if ! [ -L $link_folder ]
then
rm -Rf "$link_folder"
ln -sfn ${PYTHON_EGG_CACHE}${link_folder} ${link_folder}
fi
CONTEXT_API=$1 CONTEXT_API=$1
TARGET_DB=$2 TARGET_DB=$2
@ -20,12 +26,13 @@ hdfs dfs -copyFromLocal concepts.csv ${TMP}
hdfs dfs -chmod -R 777 ${TMP} hdfs dfs -chmod -R 777 ${TMP}
echo "Creating and populating impala tables" echo "Creating and populating impala tables"
impala-shell -c "create table ${TARGET_DB}.context (id string, name string) row format delimited fields terminated by ',';" impala-shell -q "create table ${TARGET_DB}.context (id string, name string) row format delimited fields terminated by ','"
impala-shell -c "create table ${TARGET_DB}.category (context string, id string, name string) row format delimited fields terminated by ',';" impala-shell -q "create table ${TARGET_DB}.category (context string, id string, name string) row format delimited fields terminated by ','"
impala-shell -c "create table ${TARGET_DB}.concept (category string, id string, name string) row format delimited fields terminated by ',';" impala-shell -q "create table ${TARGET_DB}.concept (category string, id string, name string) row format delimited fields terminated by ','"
impala-shell -c "load data inpath '${TMP}/contexts.csv' into table ${TARGET_DB}.context;" impala-shell -d ${TARGET_DB} -q "invalidate metadata"
impala-shell -c "load data inpath '${TMP}/categories.csv' into table ${TARGET_DB}.category;" impala-shell -q "load data inpath '${TMP}/contexts.csv' into table ${TARGET_DB}.context"
impala-shell -c "load data inpath '${TMP}/concepts.csv' into table ${TARGET_DB}.concept;" impala-shell -q "load data inpath '${TMP}/categories.csv' into table ${TARGET_DB}.category"
impala-shell -q "load data inpath '${TMP}/concepts.csv' into table ${TARGET_DB}.concept"
echo "Cleaning up" echo "Cleaning up"
hdfs dfs -rm -f -r -skipTrash ${TMP} hdfs dfs -rm -f -r -skipTrash ${TMP}