1
0
Fork 0

Add documentation about the problems with views and set the number of iterations back to 5.

This commit is contained in:
Lampros Smyrnaios 2024-02-21 16:52:28 +02:00
parent 4fd242155e
commit 0198362200
1 changed files with 10 additions and 4 deletions

View File

@ -44,12 +44,12 @@ function copydb() {
# Delete the old DB from Impala cluster.
# drop tables from db
for i in `impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -d ${db} --delimited -q "show tables"`; do
`impala-shell -i ${IMPALA_HOSTNAME} -d ${db} -q "drop table ${i};"`;
`impala-shell -i ${IMPALA_HOSTNAME} -d ${db} -q "drop table ${i};"`;
done
# drop views from db
for i in `impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -d ${db} --delimited -q "show tables"`; do
`impala-shell -i ${IMPALA_HOSTNAME} -d ${db} -q "drop view ${i};"`;
`impala-shell -i ${IMPALA_HOSTNAME} -d ${db} -q "drop view ${i};"`;
done
# delete the database
@ -69,9 +69,15 @@ function copydb() {
# The "2-times-loop" is there to retry creating the views for which their tables have not been created yet.
# Since there are many DBs handled in this script and many more may be added, we cannot easily keep track of the views of all those tables, so leave this extra loop for now.
for (( k = 1; k <= 2; k++ )); do
# Ideally, we should use a "do.. while" loop, but bash does not support it, so we use an infinite "while" loop with a "break".
should_retry=0 # Should retry creating the views (in case their tables where not created before them).
# There are views of other views as well, so we may have 3,4,5 nested-view and need to retry..
# That's why there was a 5-times loop before.. and is added again temporarily..
for (( k = 1; k <= 5; k++ )); do # TODO - To be replaced by a while-loop.
echo -e "\nCreate tables iteration_${k}\n"
for i in `hive -e "use $db; show tables;" | sed 's/WARN:.*//g'`; do
for i in `hive -e "use $db; show tables;" | sed 's/WARN:.*//g'`; do # This includes "views" as well..
# TODO - A view will not have a parquet-file with its name and it would not help anyway. So, we need to find another way to know how the views are created..
CURRENT_PRQ_FILE=`hdfs dfs -conf /etc/impala_cluster/hdfs-site.xml -ls -C "/tmp/${TEMP_SUBDIR}/${db}.db/${i}/*.parq" | head -1`
if [[ -z "$CURRENT_PRQ_FILE" ]]; then
echo -e "The table \"${i}\" had no parquet files to get the schema from!\n"