forked from D-Net/dnet-hadoop
Add documentation about the problems with views and set the number of iterations back to 5.
This commit is contained in:
parent
4fd242155e
commit
0198362200
|
@ -44,12 +44,12 @@ function copydb() {
|
||||||
# Delete the old DB from Impala cluster.
|
# Delete the old DB from Impala cluster.
|
||||||
# drop tables from db
|
# drop tables from db
|
||||||
for i in `impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -d ${db} --delimited -q "show tables"`; do
|
for i in `impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -d ${db} --delimited -q "show tables"`; do
|
||||||
`impala-shell -i ${IMPALA_HOSTNAME} -d ${db} -q "drop table ${i};"`;
|
`impala-shell -i ${IMPALA_HOSTNAME} -d ${db} -q "drop table ${i};"`;
|
||||||
done
|
done
|
||||||
|
|
||||||
# drop views from db
|
# drop views from db
|
||||||
for i in `impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -d ${db} --delimited -q "show tables"`; do
|
for i in `impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -d ${db} --delimited -q "show tables"`; do
|
||||||
`impala-shell -i ${IMPALA_HOSTNAME} -d ${db} -q "drop view ${i};"`;
|
`impala-shell -i ${IMPALA_HOSTNAME} -d ${db} -q "drop view ${i};"`;
|
||||||
done
|
done
|
||||||
|
|
||||||
# delete the database
|
# delete the database
|
||||||
|
@ -69,9 +69,15 @@ function copydb() {
|
||||||
# The "2-times-loop" is there to retry creating the views for which their tables have not been created yet.
|
# The "2-times-loop" is there to retry creating the views for which their tables have not been created yet.
|
||||||
# Since there are many DBs handled in this script and many more may be added, we cannot easily keep track of the views of all those tables, so leave this extra loop for now.
|
# Since there are many DBs handled in this script and many more may be added, we cannot easily keep track of the views of all those tables, so leave this extra loop for now.
|
||||||
|
|
||||||
for (( k = 1; k <= 2; k++ )); do
|
# Ideally, we should use a "do.. while" loop, but bash does not support it, so we use an infinite "while" loop with a "break".
|
||||||
|
should_retry=0 # Should retry creating the views (in case their tables where not created before them).
|
||||||
|
# There are views of other views as well, so we may have 3,4,5 nested-view and need to retry..
|
||||||
|
# That's why there was a 5-times loop before.. and is added again temporarily..
|
||||||
|
|
||||||
|
for (( k = 1; k <= 5; k++ )); do # TODO - To be replaced by a while-loop.
|
||||||
echo -e "\nCreate tables iteration_${k}\n"
|
echo -e "\nCreate tables iteration_${k}\n"
|
||||||
for i in `hive -e "use $db; show tables;" | sed 's/WARN:.*//g'`; do
|
for i in `hive -e "use $db; show tables;" | sed 's/WARN:.*//g'`; do # This includes "views" as well..
|
||||||
|
# TODO - A view will not have a parquet-file with its name and it would not help anyway. So, we need to find another way to know how the views are created..
|
||||||
CURRENT_PRQ_FILE=`hdfs dfs -conf /etc/impala_cluster/hdfs-site.xml -ls -C "/tmp/${TEMP_SUBDIR}/${db}.db/${i}/*.parq" | head -1`
|
CURRENT_PRQ_FILE=`hdfs dfs -conf /etc/impala_cluster/hdfs-site.xml -ls -C "/tmp/${TEMP_SUBDIR}/${db}.db/${i}/*.parq" | head -1`
|
||||||
if [[ -z "$CURRENT_PRQ_FILE" ]]; then
|
if [[ -z "$CURRENT_PRQ_FILE" ]]; then
|
||||||
echo -e "The table \"${i}\" had no parquet files to get the schema from!\n"
|
echo -e "The table \"${i}\" had no parquet files to get the schema from!\n"
|
||||||
|
|
Loading…
Reference in New Issue