From bc8c97182d5f495c6d7ceb0947bcdaa83bb14251 Mon Sep 17 00:00:00 2001 From: LSmyrnaios Date: Tue, 26 Mar 2024 13:01:12 +0200 Subject: [PATCH] Automatically select the ACTIVE HDFS NODE for Impala cluster, in all "copyDataToImpalaCluster.sh" scripts. --- .../oozie_app/copyDataToImpalaCluster.sh | 16 ++++++++++++++-- .../oozie_app/copyDataToImpalaCluster.sh | 15 +++++++++++++-- .../oozie_app/copyDataToImpalaCluster.sh | 15 +++++++++++++-- 3 files changed, 40 insertions(+), 6 deletions(-) diff --git a/dhp-workflows/dhp-stats-hist-snaps/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-hist-snaps/oozie_app/copyDataToImpalaCluster.sh b/dhp-workflows/dhp-stats-hist-snaps/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-hist-snaps/oozie_app/copyDataToImpalaCluster.sh index ef9bb9495..ef80d0094 100644 --- a/dhp-workflows/dhp-stats-hist-snaps/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-hist-snaps/oozie_app/copyDataToImpalaCluster.sh +++ b/dhp-workflows/dhp-stats-hist-snaps/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-hist-snaps/oozie_app/copyDataToImpalaCluster.sh @@ -8,12 +8,24 @@ fi export HADOOP_USER_NAME=$2 +IMPALA_HDFS_NODE='' +if hdfs dfs -test -e hdfs://impala-cluster-mn1.openaire.eu >/dev/null 2>&1; then + IMPALA_HDFS_NODE='hdfs://impala-cluster-mn1.openaire.eu:8020' +elif hdfs dfs -test -e hdfs://impala-cluster-mn2.openaire.eu >/dev/null 2>&1; then + IMPALA_HDFS_NODE='hdfs://impala-cluster-mn2.openaire.eu:8020' +else + echo -e "\n\nPROBLEM WHEN SETTING THE HDFS-NODE FOR IMPALA CLUSTER!\n\n" + exit 1 +fi +echo "Active IMPALA HDFS Node: ${IMPALA_HDFS_NODE}" + + function copydb() { db=$1 FILE=("hive_wf_tmp_"$RANDOM) - hdfs dfs -mkdir hdfs://impala-cluster-mn1.openaire.eu:8020/tmp/$FILE/ + hdfs dfs -mkdir ${IMPALA_HDFS_NODE}/tmp/$FILE/ # change ownership to impala # hdfs dfs -conf /etc/impala_cluster/hdfs-site.xml -chmod -R 777 /tmp/$FILE/${db}.db @@ -22,7 +34,7 @@ function copydb() { # copy the databases from ocean to impala echo "copying $db" - hadoop distcp -Dmapreduce.map.memory.mb=6144 -pb hdfs://nameservice1/user/hive/warehouse/${db}.db hdfs://impala-cluster-mn1.openaire.eu:8020/tmp/$FILE/ + hadoop distcp -Dmapreduce.map.memory.mb=6144 -pb hdfs://nameservice1/user/hive/warehouse/${db}.db ${IMPALA_HDFS_NODE}/tmp/$FILE/ hdfs dfs -conf /etc/impala_cluster/hdfs-site.xml -chmod -R 777 /tmp/$FILE/${db}.db diff --git a/dhp-workflows/dhp-stats-monitor-irish/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor-irish/oozie_app/copyDataToImpalaCluster.sh b/dhp-workflows/dhp-stats-monitor-irish/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor-irish/oozie_app/copyDataToImpalaCluster.sh index 78c44ed20..e7d183ddb 100644 --- a/dhp-workflows/dhp-stats-monitor-irish/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor-irish/oozie_app/copyDataToImpalaCluster.sh +++ b/dhp-workflows/dhp-stats-monitor-irish/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor-irish/oozie_app/copyDataToImpalaCluster.sh @@ -8,11 +8,22 @@ fi export HADOOP_USER_NAME=$2 +IMPALA_HDFS_NODE='' +if hdfs dfs -test -e hdfs://impala-cluster-mn1.openaire.eu >/dev/null 2>&1; then + IMPALA_HDFS_NODE='hdfs://impala-cluster-mn1.openaire.eu:8020' +elif hdfs dfs -test -e hdfs://impala-cluster-mn2.openaire.eu >/dev/null 2>&1; then + IMPALA_HDFS_NODE='hdfs://impala-cluster-mn2.openaire.eu:8020' +else + echo -e "\n\nPROBLEM WHEN SETTING THE HDFS-NODE FOR IMPALA CLUSTER!\n\n" + exit 1 +fi +echo "Active IMPALA HDFS Node: ${IMPALA_HDFS_NODE}" + function copydb() { db=$1 FILE=("hive_wf_tmp_"$RANDOM) - hdfs dfs -mkdir hdfs://impala-cluster-mn1.openaire.eu:8020/tmp/$FILE/ + hdfs dfs -mkdir ${IMPALA_HDFS_NODE}/tmp/$FILE/ # change ownership to impala # hdfs dfs -conf /etc/impala_cluster/hdfs-site.xml -chmod -R 777 /tmp/$FILE/${db}.db @@ -21,7 +32,7 @@ function copydb() { # copy the databases from ocean to impala echo "copying $db" - hadoop distcp -Dmapreduce.map.memory.mb=6144 -pb hdfs://nameservice1/user/hive/warehouse/${db}.db hdfs://impala-cluster-mn1.openaire.eu:8020/tmp/$FILE/ + hadoop distcp -Dmapreduce.map.memory.mb=6144 -pb hdfs://nameservice1/user/hive/warehouse/${db}.db ${IMPALA_HDFS_NODE}/tmp/$FILE/ hdfs dfs -conf /etc/impala_cluster/hdfs-site.xml -chmod -R 777 /tmp/$FILE/${db}.db diff --git a/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/copyDataToImpalaCluster.sh b/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/copyDataToImpalaCluster.sh index 1587f7152..148d9b0b6 100644 --- a/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/copyDataToImpalaCluster.sh +++ b/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/copyDataToImpalaCluster.sh @@ -8,6 +8,17 @@ fi #export HADOOP_USER_NAME=$2 +IMPALA_HDFS_NODE='' +if hdfs dfs -test -e hdfs://impala-cluster-mn1.openaire.eu >/dev/null 2>&1; then + IMPALA_HDFS_NODE='hdfs://impala-cluster-mn1.openaire.eu:8020' +elif hdfs dfs -test -e hdfs://impala-cluster-mn2.openaire.eu >/dev/null 2>&1; then + IMPALA_HDFS_NODE='hdfs://impala-cluster-mn2.openaire.eu:8020' +else + echo -e "\n\nPROBLEM WHEN SETTING THE HDFS-NODE FOR IMPALA CLUSTER!\n\n" + exit 1 +fi +echo "Active IMPALA HDFS Node: ${IMPALA_HDFS_NODE}" + function copydb() { export HADOOP_USER="dimitris.pierrakos" @@ -15,7 +26,7 @@ function copydb() { db=$1 FILE=("hive_wf_tmp_"$RANDOM) - hdfs dfs -mkdir hdfs://impala-cluster-mn1.openaire.eu:8020/tmp/$FILE/ + hdfs dfs -mkdir ${IMPALA_HDFS_NODE}/tmp/$FILE/φ # change ownership to impala # hdfs dfs -conf /etc/impala_cluster/hdfs-site.xml -chmod -R 777 /tmp/$FILE/${db}.db @@ -24,7 +35,7 @@ function copydb() { # copy the databases from ocean to impala echo "copying $db" - hadoop distcp -Dmapreduce.map.memory.mb=6144 -pb hdfs://nameservice1/user/hive/warehouse/${db}.db hdfs://impala-cluster-mn1.openaire.eu:8020/tmp/$FILE/ + hadoop distcp -Dmapreduce.map.memory.mb=6144 -pb hdfs://nameservice1/user/hive/warehouse/${db}.db ${IMPALA_HDFS_NODE}/tmp/$FILE/ hdfs dfs -conf /etc/impala_cluster/hdfs-site.xml -chmod -R 777 /tmp/$FILE/${db}.db