From c858c0211185f3c096ffad18b5afbfa7fd464c92 Mon Sep 17 00:00:00 2001
From: LSmyrnaios <lsmyrnaios@gmail.com>
Date: Thu, 20 Jun 2024 14:33:46 +0300
Subject: [PATCH] - Fix not using the "export HADOOP_USER_NAME" statement in
 "createPDFsAggregated.sh", which caused permission-issues when creating
 tables with Impala. - Remove unused "--user" parameter in "impala-shell"
 calls. - Code polishing.

---
 .../oozie_app/copyDataToImpalaCluster.sh      | 14 +++++-----
 .../oozie_app/copyDataToImpalaCluster.sh      | 14 +++++-----
 .../oozie_app/copyDataToImpalaCluster.sh      | 14 +++++-----
 .../oozie_app/copyDataToImpalaCluster.sh      | 19 ++++++-------
 .../stats/oozie_app/createPDFsAggregated.sh   | 28 +++++++++++--------
 5 files changed, 46 insertions(+), 43 deletions(-)
diff --git a/dhp-workflows/dhp-stats-hist-snaps/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-hist-snaps/oozie_app/copyDataToImpalaCluster.sh b/dhp-workflows/dhp-stats-hist-snaps/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-hist-snaps/oozie_app/copyDataToImpalaCluster.sh
index 978cf4a9a..09ea1b393 100644
--- a/dhp-workflows/dhp-stats-hist-snaps/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-hist-snaps/oozie_app/copyDataToImpalaCluster.sh
+++ b/dhp-workflows/dhp-stats-hist-snaps/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-hist-snaps/oozie_app/copyDataToImpalaCluster.sh
@@ -63,7 +63,7 @@ function copydb() {
   start_db_time=$(date +%s)
 
   # Delete the old DB from Impala cluster (if exists).
-  impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "drop database if exists ${db} cascade" |& tee error.log # impala-shell prints all logs in stderr, so wee need to capture them and put them in a file, in order to perform "grep" on them later
+  impala-shell -i ${IMPALA_HOSTNAME} -q "drop database if exists ${db} cascade" |& tee error.log # impala-shell prints all logs in stderr, so wee need to capture them and put them in a file, in order to perform "grep" on them later
   log_errors=`cat error.log | grep -E "WARN|ERROR|FAILED"`
   if [ -n "$log_errors" ]; then
     echo -e "\n\nERROR: THERE WAS A PROBLEM WHEN DROPPING THE OLD DATABASE FROM IMPALA CLUSTER! EXITING...\n\n"
@@ -120,7 +120,7 @@ function copydb() {
   start_create_schema_time=$(date +%s)
 
   # create the new database (with the same name)
-  impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "create database ${db}"
+  impala-shell -i ${IMPALA_HOSTNAME} -q "create database ${db}"
 
   # Because "Hive" and "Impala" do not have compatible schemas, we cannot use the "show create table <name>" output from hive to create the exact same table in impala.
   # So, we have to find at least one parquet file (check if it's there) from the table in the ocean cluster for impala to use it to extract the table-schema itself from that file.
@@ -148,7 +148,7 @@ function copydb() {
             exit 5
           fi  # This error is not FATAL, do we do not return from this function, in normal circumstances.
       else
-        impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "create table ${db}.${i} like parquet '${CURRENT_PRQ_FILE}' stored as parquet;" |& tee error.log
+        impala-shell -i ${IMPALA_HOSTNAME} -q "create table ${db}.${i} like parquet '${CURRENT_PRQ_FILE}' stored as parquet;" |& tee error.log
         log_errors=`cat error.log | grep -E "WARN|ERROR|FAILED"`
         if [ -n "$log_errors" ]; then
           echo -e "\n\nERROR: THERE WAS A PROBLEM WHEN CREATING TABLE '${i}'!\n\n"
@@ -182,7 +182,7 @@ function copydb() {
     new_num_of_views_to_retry=0
 
     for create_view_statement in "${all_create_view_statements[@]}"; do # Here we use double quotes, as the elements are phrases, instead of single-words.
-      impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "${create_view_statement}" |& tee error.log # impala-shell prints all logs in stderr, so wee need to capture them and put them in a file, in order to perform "grep" on them later
+      impala-shell -i ${IMPALA_HOSTNAME} -q "${create_view_statement}" |& tee error.log # impala-shell prints all logs in stderr, so wee need to capture them and put them in a file, in order to perform "grep" on them later
       specific_errors=`cat error.log | grep -E "FAILED: ParseException line 1:13 missing TABLE at 'view'|ERROR: AnalysisException: Could not resolve table reference:"`
       if [ -n "$specific_errors" ]; then
         echo -e "\nspecific_errors: ${specific_errors}\n"
@@ -212,7 +212,7 @@ function copydb() {
     previous_num_of_views_to_retry=$new_num_of_views_to_retry
   done
 
-  entities_on_impala=(`impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} --delimited -q "show tables in ${db}"`)
+  entities_on_impala=(`impala-shell -i ${IMPALA_HOSTNAME} --delimited -q "show tables in ${db}"`)
   echo -e "\nThe schema of db '${db}', along with ${#entities_on_impala[@]} entities have been created, on Impala cluster, after: $(print_elapsed_time start_create_schema_time)\n"
 
   start_compute_stats_time=$(date +%s)
@@ -222,9 +222,9 @@ function copydb() {
     create_view_statement=`hive -e "show create table ${db}.${i};" | grep "CREATE VIEW"`  # This grep works here, as we do not want to match multiple-lines.
     if [ -z "$create_view_statement" ]; then  # If it's a table, then go load the data to it.
       # Invalidate metadata of this DB's tables, in order for Impala to be aware of all parquet files put inside the tables' directories, previously, by "hadoop distcp".
-      impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "INVALIDATE METADATA ${db}.${i}"
+      impala-shell -i ${IMPALA_HOSTNAME} -q "INVALIDATE METADATA ${db}.${i}"
       sleep 1
-      impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "compute stats ${db}.${i}" |& tee error.log
+      impala-shell -i ${IMPALA_HOSTNAME} -q "compute stats ${db}.${i}" |& tee error.log
       log_errors=`cat error.log | grep -E "WARN|ERROR|FAILED"`
       if [ -n "$log_errors" ]; then
         echo -e "\n\nERROR: THERE WAS A PROBLEM WHEN COMPUTING STATS FOR TABLE '${i}'!\n\n"
diff --git a/dhp-workflows/dhp-stats-monitor-irish/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor-irish/oozie_app/copyDataToImpalaCluster.sh b/dhp-workflows/dhp-stats-monitor-irish/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor-irish/oozie_app/copyDataToImpalaCluster.sh
index 978cf4a9a..09ea1b393 100644
--- a/dhp-workflows/dhp-stats-monitor-irish/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor-irish/oozie_app/copyDataToImpalaCluster.sh
+++ b/dhp-workflows/dhp-stats-monitor-irish/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor-irish/oozie_app/copyDataToImpalaCluster.sh
@@ -63,7 +63,7 @@ function copydb() {
   start_db_time=$(date +%s)
 
   # Delete the old DB from Impala cluster (if exists).
-  impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "drop database if exists ${db} cascade" |& tee error.log # impala-shell prints all logs in stderr, so wee need to capture them and put them in a file, in order to perform "grep" on them later
+  impala-shell -i ${IMPALA_HOSTNAME} -q "drop database if exists ${db} cascade" |& tee error.log # impala-shell prints all logs in stderr, so wee need to capture them and put them in a file, in order to perform "grep" on them later
   log_errors=`cat error.log | grep -E "WARN|ERROR|FAILED"`
   if [ -n "$log_errors" ]; then
     echo -e "\n\nERROR: THERE WAS A PROBLEM WHEN DROPPING THE OLD DATABASE FROM IMPALA CLUSTER! EXITING...\n\n"
@@ -120,7 +120,7 @@ function copydb() {
   start_create_schema_time=$(date +%s)
 
   # create the new database (with the same name)
-  impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "create database ${db}"
+  impala-shell -i ${IMPALA_HOSTNAME} -q "create database ${db}"
 
   # Because "Hive" and "Impala" do not have compatible schemas, we cannot use the "show create table <name>" output from hive to create the exact same table in impala.
   # So, we have to find at least one parquet file (check if it's there) from the table in the ocean cluster for impala to use it to extract the table-schema itself from that file.
@@ -148,7 +148,7 @@ function copydb() {
             exit 5
           fi  # This error is not FATAL, do we do not return from this function, in normal circumstances.
       else
-        impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "create table ${db}.${i} like parquet '${CURRENT_PRQ_FILE}' stored as parquet;" |& tee error.log
+        impala-shell -i ${IMPALA_HOSTNAME} -q "create table ${db}.${i} like parquet '${CURRENT_PRQ_FILE}' stored as parquet;" |& tee error.log
         log_errors=`cat error.log | grep -E "WARN|ERROR|FAILED"`
         if [ -n "$log_errors" ]; then
           echo -e "\n\nERROR: THERE WAS A PROBLEM WHEN CREATING TABLE '${i}'!\n\n"
@@ -182,7 +182,7 @@ function copydb() {
     new_num_of_views_to_retry=0
 
     for create_view_statement in "${all_create_view_statements[@]}"; do # Here we use double quotes, as the elements are phrases, instead of single-words.
-      impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "${create_view_statement}" |& tee error.log # impala-shell prints all logs in stderr, so wee need to capture them and put them in a file, in order to perform "grep" on them later
+      impala-shell -i ${IMPALA_HOSTNAME} -q "${create_view_statement}" |& tee error.log # impala-shell prints all logs in stderr, so wee need to capture them and put them in a file, in order to perform "grep" on them later
       specific_errors=`cat error.log | grep -E "FAILED: ParseException line 1:13 missing TABLE at 'view'|ERROR: AnalysisException: Could not resolve table reference:"`
       if [ -n "$specific_errors" ]; then
         echo -e "\nspecific_errors: ${specific_errors}\n"
@@ -212,7 +212,7 @@ function copydb() {
     previous_num_of_views_to_retry=$new_num_of_views_to_retry
   done
 
-  entities_on_impala=(`impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} --delimited -q "show tables in ${db}"`)
+  entities_on_impala=(`impala-shell -i ${IMPALA_HOSTNAME} --delimited -q "show tables in ${db}"`)
   echo -e "\nThe schema of db '${db}', along with ${#entities_on_impala[@]} entities have been created, on Impala cluster, after: $(print_elapsed_time start_create_schema_time)\n"
 
   start_compute_stats_time=$(date +%s)
@@ -222,9 +222,9 @@ function copydb() {
     create_view_statement=`hive -e "show create table ${db}.${i};" | grep "CREATE VIEW"`  # This grep works here, as we do not want to match multiple-lines.
     if [ -z "$create_view_statement" ]; then  # If it's a table, then go load the data to it.
       # Invalidate metadata of this DB's tables, in order for Impala to be aware of all parquet files put inside the tables' directories, previously, by "hadoop distcp".
-      impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "INVALIDATE METADATA ${db}.${i}"
+      impala-shell -i ${IMPALA_HOSTNAME} -q "INVALIDATE METADATA ${db}.${i}"
       sleep 1
-      impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "compute stats ${db}.${i}" |& tee error.log
+      impala-shell -i ${IMPALA_HOSTNAME} -q "compute stats ${db}.${i}" |& tee error.log
       log_errors=`cat error.log | grep -E "WARN|ERROR|FAILED"`
       if [ -n "$log_errors" ]; then
         echo -e "\n\nERROR: THERE WAS A PROBLEM WHEN COMPUTING STATS FOR TABLE '${i}'!\n\n"
diff --git a/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/copyDataToImpalaCluster.sh b/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/copyDataToImpalaCluster.sh
index 55ae3114e..d75412df8 100644
--- a/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/copyDataToImpalaCluster.sh
+++ b/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/copyDataToImpalaCluster.sh
@@ -63,7 +63,7 @@ function copydb() {
   start_db_time=$(date +%s)
 
   # Delete the old DB from Impala cluster (if exists).
-  impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "drop database if exists ${db} cascade" |& tee error.log # impala-shell prints all logs in stderr, so wee need to capture them and put them in a file, in order to perform "grep" on them later
+  impala-shell -i ${IMPALA_HOSTNAME} -q "drop database if exists ${db} cascade" |& tee error.log # impala-shell prints all logs in stderr, so wee need to capture them and put them in a file, in order to perform "grep" on them later
   log_errors=`cat error.log | grep -E "WARN|ERROR|FAILED"`
   if [ -n "$log_errors" ]; then
     echo -e "\n\nERROR: THERE WAS A PROBLEM WHEN DROPPING THE OLD DATABASE FROM IMPALA CLUSTER! EXITING...\n\n"
@@ -120,7 +120,7 @@ function copydb() {
   start_create_schema_time=$(date +%s)
 
   # create the new database (with the same name)
-  impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "create database ${db}"
+  impala-shell -i ${IMPALA_HOSTNAME} -q "create database ${db}"
 
   # Because "Hive" and "Impala" do not have compatible schemas, we cannot use the "show create table <name>" output from hive to create the exact same table in impala.
   # So, we have to find at least one parquet file (check if it's there) from the table in the ocean cluster for impala to use it to extract the table-schema itself from that file.
@@ -148,7 +148,7 @@ function copydb() {
             exit 5
           fi  # This error is not FATAL, do we do not return from this function, in normal circumstances.
       else
-        impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "create table ${db}.${i} like parquet '${CURRENT_PRQ_FILE}' stored as parquet;" |& tee error.log
+        impala-shell -i ${IMPALA_HOSTNAME} -q "create table ${db}.${i} like parquet '${CURRENT_PRQ_FILE}' stored as parquet;" |& tee error.log
         log_errors=`cat error.log | grep -E "WARN|ERROR|FAILED"`
         if [ -n "$log_errors" ]; then
           echo -e "\n\nERROR: THERE WAS A PROBLEM WHEN CREATING TABLE '${i}'!\n\n"
@@ -182,7 +182,7 @@ function copydb() {
     new_num_of_views_to_retry=0
 
     for create_view_statement in "${all_create_view_statements[@]}"; do # Here we use double quotes, as the elements are phrases, instead of single-words.
-      impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "${create_view_statement}" |& tee error.log # impala-shell prints all logs in stderr, so wee need to capture them and put them in a file, in order to perform "grep" on them later
+      impala-shell -i ${IMPALA_HOSTNAME} -q "${create_view_statement}" |& tee error.log # impala-shell prints all logs in stderr, so wee need to capture them and put them in a file, in order to perform "grep" on them later
       specific_errors=`cat error.log | grep -E "FAILED: ParseException line 1:13 missing TABLE at 'view'|ERROR: AnalysisException: Could not resolve table reference:"`
       if [ -n "$specific_errors" ]; then
         echo -e "\nspecific_errors: ${specific_errors}\n"
@@ -212,7 +212,7 @@ function copydb() {
     previous_num_of_views_to_retry=$new_num_of_views_to_retry
   done
 
-  entities_on_impala=(`impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} --delimited -q "show tables in ${db}"`)
+  entities_on_impala=(`impala-shell -i ${IMPALA_HOSTNAME} --delimited -q "show tables in ${db}"`)
   echo -e "\nThe schema of db '${db}', along with ${#entities_on_impala[@]} entities have been created, on Impala cluster, after: $(print_elapsed_time start_create_schema_time)\n"
 
   start_compute_stats_time=$(date +%s)
@@ -222,9 +222,9 @@ function copydb() {
     create_view_statement=`hive -e "show create table ${db}.${i};" | grep "CREATE VIEW"`  # This grep works here, as we do not want to match multiple-lines.
     if [ -z "$create_view_statement" ]; then  # If it's a table, then go load the data to it.
       # Invalidate metadata of this DB's tables, in order for Impala to be aware of all parquet files put inside the tables' directories, previously, by "hadoop distcp".
-      impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "INVALIDATE METADATA ${db}.${i}"
+      impala-shell -i ${IMPALA_HOSTNAME} -q "INVALIDATE METADATA ${db}.${i}"
       sleep 1
-      impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "compute stats ${db}.${i}" |& tee error.log
+      impala-shell -i ${IMPALA_HOSTNAME} -q "compute stats ${db}.${i}" |& tee error.log
       log_errors=`cat error.log | grep -E "WARN|ERROR|FAILED"`
       if [ -n "$log_errors" ]; then
         echo -e "\n\nERROR: THERE WAS A PROBLEM WHEN COMPUTING STATS FOR TABLE '${i}'!\n\n"
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/copyDataToImpalaCluster.sh b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/copyDataToImpalaCluster.sh
index 07a8a4534..96c61d91a 100644
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/copyDataToImpalaCluster.sh
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/copyDataToImpalaCluster.sh
@@ -65,7 +65,7 @@ function copydb() {
   start_db_time=$(date +%s)
 
   # Delete the old DB from Impala cluster (if exists).
-  impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "drop database if exists ${db} cascade" |& tee error.log # impala-shell prints all logs in stderr, so wee need to capture them and put them in a file, in order to perform "grep" on them later
+  impala-shell -i ${IMPALA_HOSTNAME} -q "drop database if exists ${db} cascade" |& tee error.log # impala-shell prints all logs in stderr, so wee need to capture them and put them in a file, in order to perform "grep" on them later
   log_errors=`cat error.log | grep -E "WARN|ERROR|FAILED"`
   if [ -n "$log_errors" ]; then
     echo -e "\n\nERROR: THERE WAS A PROBLEM WHEN DROPPING THE OLD DATABASE FROM IMPALA CLUSTER! EXITING...\n\n"
@@ -122,7 +122,7 @@ function copydb() {
   start_create_schema_time=$(date +%s)
 
   # create the new database (with the same name)
-  impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "create database ${db}"
+  impala-shell -i ${IMPALA_HOSTNAME} -q "create database ${db}"
 
   # Because "Hive" and "Impala" do not have compatible schemas, we cannot use the "show create table <name>" output from hive to create the exact same table in impala.
   # So, we have to find at least one parquet file (check if it's there) from the table in the ocean cluster for impala to use it to extract the table-schema itself from that file.
@@ -150,7 +150,7 @@ function copydb() {
             exit 5
           fi  # This error is not FATAL, do we do not return from this function, in normal circumstances.
       else
-        impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "create table ${db}.${i} like parquet '${CURRENT_PRQ_FILE}' stored as parquet;" |& tee error.log
+        impala-shell -i ${IMPALA_HOSTNAME} -q "create table ${db}.${i} like parquet '${CURRENT_PRQ_FILE}' stored as parquet;" |& tee error.log
         log_errors=`cat error.log | grep -E "WARN|ERROR|FAILED"`
         if [ -n "$log_errors" ]; then
           echo -e "\n\nERROR: THERE WAS A PROBLEM WHEN CREATING TABLE '${i}'!\n\n"
@@ -184,7 +184,7 @@ function copydb() {
     new_num_of_views_to_retry=0
 
     for create_view_statement in "${all_create_view_statements[@]}"; do # Here we use double quotes, as the elements are phrases, instead of single-words.
-      impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "${create_view_statement}" |& tee error.log # impala-shell prints all logs in stderr, so wee need to capture them and put them in a file, in order to perform "grep" on them later
+      impala-shell -i ${IMPALA_HOSTNAME} -q "${create_view_statement}" |& tee error.log # impala-shell prints all logs in stderr, so wee need to capture them and put them in a file, in order to perform "grep" on them later
       specific_errors=`cat error.log | grep -E "FAILED: ParseException line 1:13 missing TABLE at 'view'|ERROR: AnalysisException: Could not resolve table reference:"`
       if [ -n "$specific_errors" ]; then
         echo -e "\nspecific_errors: ${specific_errors}\n"
@@ -214,7 +214,7 @@ function copydb() {
     previous_num_of_views_to_retry=$new_num_of_views_to_retry
   done
 
-  entities_on_impala=(`impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} --delimited -q "show tables in ${db}"`)
+  entities_on_impala=(`impala-shell -i ${IMPALA_HOSTNAME} --delimited -q "show tables in ${db}"`)
   echo -e "\nThe schema of db '${db}', along with ${#entities_on_impala[@]} entities have been created, on Impala cluster, after: $(print_elapsed_time start_create_schema_time)\n"
 
   start_compute_stats_time=$(date +%s)
@@ -224,9 +224,9 @@ function copydb() {
     create_view_statement=`hive -e "show create table ${db}.${i};" | grep "CREATE VIEW"`  # This grep works here, as we do not want to match multiple-lines.
     if [ -z "$create_view_statement" ]; then  # If it's a table, then go load the data to it.
       # Invalidate metadata of this DB's tables, in order for Impala to be aware of all parquet files put inside the tables' directories, previously, by "hadoop distcp".
-      impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "INVALIDATE METADATA ${db}.${i}"
+      impala-shell -i ${IMPALA_HOSTNAME} -q "INVALIDATE METADATA ${db}.${i}"
       sleep 1
-      impala-shell --user ${HADOOP_USER_NAME} -i ${IMPALA_HOSTNAME} -q "compute stats ${db}.${i}" |& tee error.log
+      impala-shell -i ${IMPALA_HOSTNAME} -q "compute stats ${db}.${i}" |& tee error.log
       log_errors=`cat error.log | grep -E "WARN|ERROR|FAILED"`
       if [ -n "$log_errors" ]; then
         echo -e "\n\nERROR: THERE WAS A PROBLEM WHEN COMPUTING STATS FOR TABLE '${i}'!\n\n"
@@ -271,8 +271,7 @@ copydb $MONITOR_DB'_institutions'
 copydb $MONITOR_DB'_ris_tail'
 
 contexts="knowmad::other dh-ch::other enermaps::other gotriple::other neanias-atmospheric::other rural-digital-europe::other covid-19::other aurora::other neanias-space::other north-america-studies::other north-american-studies::other eutopia::other"
-for i in ${contexts}
-do
-  tmp=`echo "$i"  | sed 's/'-'/'_'/g' | sed 's/'::'/'_'/g'`
+for i in ${contexts}; do
+  tmp=`echo "$i" | sed 's/'-'/'_'/g' | sed 's/'::'/'_'/g'`
   copydb ${MONITOR_DB}'_'${tmp}
 done
\ No newline at end of file
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/createPDFsAggregated.sh b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/createPDFsAggregated.sh
index 46631a0c2..9eec0bb20 100644
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/createPDFsAggregated.sh
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/createPDFsAggregated.sh
@@ -6,21 +6,26 @@ then
     ln -sfn ${PYTHON_EGG_CACHE}${link_folder} ${link_folder}
 fi
 
+export HADOOP_USER_NAME=$3
+
+IMPALA_HOSTNAME='impala-cluster-dn1.openaire.eu'
+
 function createPDFsAggregated() {
   db=$1
 
-impala-shell --user $HADOOP_USER_NAME -i impala-cluster-dn1.openaire.eu -d ${db} -q "drop table if exists indi_is_result_accessible";
+  impala-shell -i ${IMPALA_HOSTNAME} -d ${db} -q "drop table if exists indi_is_result_accessible";
 
-impala-shell --user $HADOOP_USER_NAME -i impala-cluster-dn1.openaire.eu -d ${db} -q "create table indi_is_result_accessible stored as parquet as
+  impala-shell -i ${IMPALA_HOSTNAME} -d ${db} -q "create table indi_is_result_accessible stored as parquet as
     select distinct p.id, coalesce(is_result_accessible, 0) as is_result_accessible from result p
     left outer join
-    (select id, 1 as is_result_accessible from (select pl.* from result r
-    join pdfaggregation_i.publication p on r.id=p.id
-    join pdfaggregation_i.payload pl on pl.id=p.id
-    union all
-    select pl.* from result r
-    join pdfaggregation_i.publication p on r.id=p.dedupid
-    join pdfaggregation_i.payload pl on pl.id=p.id) foo) tmp on p.id=tmp.id";
+      (select id, 1 as is_result_accessible from (select pl.* from result r
+      join pdfaggregation_i.publication p on r.id=p.id
+      join pdfaggregation_i.payload pl on pl.id=p.id
+      union all
+      select pl.* from result r
+      join pdfaggregation_i.publication p on r.id=p.dedupid
+      join pdfaggregation_i.payload pl on pl.id=p.id) foo)
+      tmp on p.id=tmp.id";
 }
 
 STATS_DB=$1
@@ -35,8 +40,7 @@ createPDFsAggregated $MONITOR_DB'_institutions'
 createPDFsAggregated $MONITOR_DB'_ris_tail'
 
 contexts="knowmad::other dh-ch::other enermaps::other gotriple::other neanias-atmospheric::other rural-digital-europe::other covid-19::other aurora::other neanias-space::other north-america-studies::other north-american-studies::other eutopia::other"
-for i in ${contexts}
-do
-   tmp=`echo "$i"  | sed 's/'-'/'_'/g' | sed 's/'::'/'_'/g'`
+for i in ${contexts}; do
+  tmp=`echo "$i" | sed 's/'-'/'_'/g' | sed 's/'::'/'_'/g'`
   createPDFsAggregated ${MONITOR_DB}'_'${tmp}
 done
\ No newline at end of file