diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/copyDataToImpalaCluster.sh b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/copyDataToImpalaCluster.sh index 989eeae84..043780c07 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/copyDataToImpalaCluster.sh +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/copyDataToImpalaCluster.sh @@ -44,12 +44,12 @@ function copydb() { # Delete the old DB from Impala cluster. # drop tables from db for i in `impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -d ${db} --delimited -q "show tables"`; do - `impala-shell -i ${IMPALA_HOSTNAME} -d ${db} -q "drop table ${i};"`; + `impala-shell -i ${IMPALA_HOSTNAME} -d ${db} -q "drop table ${i};"`; done # drop views from db for i in `impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -d ${db} --delimited -q "show tables"`; do - `impala-shell -i ${IMPALA_HOSTNAME} -d ${db} -q "drop view ${i};"`; + `impala-shell -i ${IMPALA_HOSTNAME} -d ${db} -q "drop view ${i};"`; done # delete the database @@ -69,9 +69,15 @@ function copydb() { # The "2-times-loop" is there to retry creating the views for which their tables have not been created yet. # Since there are many DBs handled in this script and many more may be added, we cannot easily keep track of the views of all those tables, so leave this extra loop for now. - for (( k = 1; k <= 2; k++ )); do + # Ideally, we should use a "do.. while" loop, but bash does not support it, so we use an infinite "while" loop with a "break". + should_retry=0 # Should retry creating the views (in case their tables where not created before them). + # There are views of other views as well, so we may have 3,4,5 nested-view and need to retry.. + # That's why there was a 5-times loop before.. and is added again temporarily.. + + for (( k = 1; k <= 5; k++ )); do # TODO - To be replaced by a while-loop. echo -e "\nCreate tables iteration_${k}\n" - for i in `hive -e "use $db; show tables;" | sed 's/WARN:.*//g'`; do + for i in `hive -e "use $db; show tables;" | sed 's/WARN:.*//g'`; do # This includes "views" as well.. + # TODO - A view will not have a parquet-file with its name and it would not help anyway. So, we need to find another way to know how the views are created.. CURRENT_PRQ_FILE=`hdfs dfs -conf /etc/impala_cluster/hdfs-site.xml -ls -C "/tmp/${TEMP_SUBDIR}/${db}.db/${i}/*.parq" | head -1` if [[ -z "$CURRENT_PRQ_FILE" ]]; then echo -e "The table \"${i}\" had no parquet files to get the schema from!\n"