diff --git a/dhp-workflows/dhp-stats-hist-snaps/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-hist-snaps/oozie_app/copyDataToImpalaCluster.sh b/dhp-workflows/dhp-stats-hist-snaps/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-hist-snaps/oozie_app/copyDataToImpalaCluster.sh index ef80d0094..6250aca81 100644 --- a/dhp-workflows/dhp-stats-hist-snaps/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-hist-snaps/oozie_app/copyDataToImpalaCluster.sh +++ b/dhp-workflows/dhp-stats-hist-snaps/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-hist-snaps/oozie_app/copyDataToImpalaCluster.sh @@ -9,15 +9,27 @@ fi export HADOOP_USER_NAME=$2 IMPALA_HDFS_NODE='' -if hdfs dfs -test -e hdfs://impala-cluster-mn1.openaire.eu >/dev/null 2>&1; then - IMPALA_HDFS_NODE='hdfs://impala-cluster-mn1.openaire.eu:8020' -elif hdfs dfs -test -e hdfs://impala-cluster-mn2.openaire.eu >/dev/null 2>&1; then - IMPALA_HDFS_NODE='hdfs://impala-cluster-mn2.openaire.eu:8020' -else - echo -e "\n\nPROBLEM WHEN SETTING THE HDFS-NODE FOR IMPALA CLUSTER!\n\n" +COUNTER=0 + +while [ $COUNTER -lt 3 ]; do + if hdfs dfs -test -e hdfs://impala-cluster-mn1.openaire.eu/tmp >/dev/null 2>&1; then + IMPALA_HDFS_NODE='hdfs://impala-cluster-mn1.openaire.eu:8020' + break + elif hdfs dfs -test -e hdfs://impala-cluster-mn2.openaire.eu/tmp >/dev/null 2>&1; then + IMPALA_HDFS_NODE='hdfs://impala-cluster-mn2.openaire.eu:8020' + break + else + IMPALA_HDFS_NODE='' + sleep 1 + fi + ((COUNTER++)) +done + +if [ -z "$IMPALA_HDFS_NODE" ]; then + echo -e "\n\nPROBLEM WHEN SETTING THE HDFS-NODE FOR IMPALA CLUSTER! $COUNTER\n\n" exit 1 fi -echo "Active IMPALA HDFS Node: ${IMPALA_HDFS_NODE}" +echo "Active IMPALA HDFS Node: ${IMPALA_HDFS_NODE} , after ${COUNTER} retries." function copydb() { diff --git a/dhp-workflows/dhp-stats-monitor-irish/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor-irish/oozie_app/copyDataToImpalaCluster.sh b/dhp-workflows/dhp-stats-monitor-irish/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor-irish/oozie_app/copyDataToImpalaCluster.sh index e7d183ddb..97fa0dd9c 100644 --- a/dhp-workflows/dhp-stats-monitor-irish/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor-irish/oozie_app/copyDataToImpalaCluster.sh +++ b/dhp-workflows/dhp-stats-monitor-irish/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor-irish/oozie_app/copyDataToImpalaCluster.sh @@ -9,15 +9,28 @@ fi export HADOOP_USER_NAME=$2 IMPALA_HDFS_NODE='' -if hdfs dfs -test -e hdfs://impala-cluster-mn1.openaire.eu >/dev/null 2>&1; then - IMPALA_HDFS_NODE='hdfs://impala-cluster-mn1.openaire.eu:8020' -elif hdfs dfs -test -e hdfs://impala-cluster-mn2.openaire.eu >/dev/null 2>&1; then - IMPALA_HDFS_NODE='hdfs://impala-cluster-mn2.openaire.eu:8020' -else - echo -e "\n\nPROBLEM WHEN SETTING THE HDFS-NODE FOR IMPALA CLUSTER!\n\n" +COUNTER=0 + +while [ $COUNTER -lt 3 ]; do + if hdfs dfs -test -e hdfs://impala-cluster-mn1.openaire.eu/tmp >/dev/null 2>&1; then + IMPALA_HDFS_NODE='hdfs://impala-cluster-mn1.openaire.eu:8020' + break + elif hdfs dfs -test -e hdfs://impala-cluster-mn2.openaire.eu/tmp >/dev/null 2>&1; then + IMPALA_HDFS_NODE='hdfs://impala-cluster-mn2.openaire.eu:8020' + break + else + IMPALA_HDFS_NODE='' + sleep 1 + fi + ((COUNTER++)) +done + +if [ -z "$IMPALA_HDFS_NODE" ]; then + echo -e "\n\nPROBLEM WHEN SETTING THE HDFS-NODE FOR IMPALA CLUSTER! $COUNTER\n\n" exit 1 fi -echo "Active IMPALA HDFS Node: ${IMPALA_HDFS_NODE}" +echo "Active IMPALA HDFS Node: ${IMPALA_HDFS_NODE} , after ${COUNTER} retries." + function copydb() { diff --git a/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/copyDataToImpalaCluster.sh b/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/copyDataToImpalaCluster.sh index 148d9b0b6..81ac088c0 100644 --- a/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/copyDataToImpalaCluster.sh +++ b/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/copyDataToImpalaCluster.sh @@ -9,15 +9,28 @@ fi #export HADOOP_USER_NAME=$2 IMPALA_HDFS_NODE='' -if hdfs dfs -test -e hdfs://impala-cluster-mn1.openaire.eu >/dev/null 2>&1; then - IMPALA_HDFS_NODE='hdfs://impala-cluster-mn1.openaire.eu:8020' -elif hdfs dfs -test -e hdfs://impala-cluster-mn2.openaire.eu >/dev/null 2>&1; then - IMPALA_HDFS_NODE='hdfs://impala-cluster-mn2.openaire.eu:8020' -else - echo -e "\n\nPROBLEM WHEN SETTING THE HDFS-NODE FOR IMPALA CLUSTER!\n\n" +COUNTER=0 + +while [ $COUNTER -lt 3 ]; do + if hdfs dfs -test -e hdfs://impala-cluster-mn1.openaire.eu/tmp >/dev/null 2>&1; then + IMPALA_HDFS_NODE='hdfs://impala-cluster-mn1.openaire.eu:8020' + break + elif hdfs dfs -test -e hdfs://impala-cluster-mn2.openaire.eu/tmp >/dev/null 2>&1; then + IMPALA_HDFS_NODE='hdfs://impala-cluster-mn2.openaire.eu:8020' + break + else + IMPALA_HDFS_NODE='' + sleep 1 + fi + ((COUNTER++)) +done + +if [ -z "$IMPALA_HDFS_NODE" ]; then + echo -e "\n\nPROBLEM WHEN SETTING THE HDFS-NODE FOR IMPALA CLUSTER! $COUNTER\n\n" exit 1 fi -echo "Active IMPALA HDFS Node: ${IMPALA_HDFS_NODE}" +echo "Active IMPALA HDFS Node: ${IMPALA_HDFS_NODE} , after ${COUNTER} retries." + function copydb() { diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/copyDataToImpalaCluster.sh b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/copyDataToImpalaCluster.sh index 1996c0b03..3f8447b6c 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/copyDataToImpalaCluster.sh +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/copyDataToImpalaCluster.sh @@ -7,15 +7,28 @@ then fi IMPALA_HDFS_NODE='' -if hdfs dfs -test -e hdfs://impala-cluster-mn1.openaire.eu >/dev/null 2>&1; then - IMPALA_HDFS_NODE='hdfs://impala-cluster-mn1.openaire.eu:8020' -elif hdfs dfs -test -e hdfs://impala-cluster-mn2.openaire.eu >/dev/null 2>&1; then - IMPALA_HDFS_NODE='hdfs://impala-cluster-mn2.openaire.eu:8020' -else - echo -e "\n\nPROBLEM WHEN SETTING THE HDFS-NODE FOR IMPALA CLUSTER!\n\n" +COUNTER=0 + +while [ $COUNTER -lt 3 ]; do + if hdfs dfs -test -e hdfs://impala-cluster-mn1.openaire.eu/tmp >/dev/null 2>&1; then + IMPALA_HDFS_NODE='hdfs://impala-cluster-mn1.openaire.eu:8020' + break + elif hdfs dfs -test -e hdfs://impala-cluster-mn2.openaire.eu/tmp >/dev/null 2>&1; then + IMPALA_HDFS_NODE='hdfs://impala-cluster-mn2.openaire.eu:8020' + break + else + IMPALA_HDFS_NODE='' + sleep 1 + fi + ((COUNTER++)) +done + +if [ -z "$IMPALA_HDFS_NODE" ]; then + echo -e "\n\nPROBLEM WHEN SETTING THE HDFS-NODE FOR IMPALA CLUSTER! $COUNTER\n\n" exit 1 fi -echo "Active IMPALA HDFS Node: ${IMPALA_HDFS_NODE}" +echo "Active IMPALA HDFS Node: ${IMPALA_HDFS_NODE} , after ${COUNTER} retries." + export HADOOP_USER_NAME=$6 export PROD_USAGE_STATS_DB="openaire_prod_usage_stats" diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/monitor.sh b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/monitor.sh index 872456973..a5b6a54cb 100755 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/monitor.sh +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/monitor.sh @@ -85,12 +85,12 @@ hive $HIVE_OPTS --database ${2}_funded -e "show tables" | grep -v WARN | sed "s/ hive -f foo echo "Updated shadow monitor funded database" -echo "Updating shadow monitor insitutions database" +echo "Updating shadow monitor institutions database" hive -e "drop database if exists ${SHADOW}_institutions cascade" hive -e "create database if not exists ${SHADOW}_institutions" hive $HIVE_OPTS --database ${2}_institutions -e "show tables" | grep -v WARN | sed "s/\(.*\)/create view ${SHADOW}_institutions.\1 as select * from ${2}_institutions.\1;/" > foo hive -f foo -echo "Shadow db monitor insitutions ready!" +echo "Shadow db monitor institutions ready!" echo "Updating shadow monitor RIs database" for i in $contexts diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql index e614ffcbd..70cde6481 100755 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql @@ -335,8 +335,8 @@ select ar.organization, rf.no_result_fair/ar.no_allresults org_fairness from allresults ar join result_fair rf on rf.organization=ar.organization; /*EOS*/ -DROP VIEW result_fair; /*EOS*/ -DROP VIEW allresults; /*EOS*/ +DROP VIEW result_fair; +DROP VIEW allresults; CREATE TEMPORARY VIEW result_fair as select year, ro.organization organization, count(distinct ro.id) no_result_fair from ${stats_db_name}.result_organization ro @@ -1000,13 +1000,13 @@ left outer join ( drop table if exists ${stats_db_name}.result_country purge; /*EOS*/ create table ${stats_db_name}.result_country stored as parquet as -select distinct ro.id, coalesce(o.country, f.country) as country +select distinct ro.id, coalesce(o.country, f.country) from ${stats_db_name}.result_organization ro left outer join ${stats_db_name}.organization o on o.id=ro.organization left outer join ${stats_db_name}.result_projects rp on rp.id=ro.id left outer join ${stats_db_name}.project p on p.id=rp.project left outer join ${stats_db_name}.funder f on f.name=p.funder -where coalesce(o.country, f.country) IS NOT NULL; /*EOS*/ +where coalesce(o.country, f.country) IS NOT NULL; drop table if exists ${stats_db_name}.indi_result_oa_with_license purge; /*EOS*/ create table ${stats_db_name}.indi_result_oa_with_license stored as parquet as diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDBAll.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDBAll.sql index 120ec93c2..a8392b226 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDBAll.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDBAll.sql @@ -78,14 +78,14 @@ create table TARGET.result stored as parquet as 'openorgs____::4d4051b56708688235252f1d8fddb8c1', -- Iscte - Instituto Universitário de Lisboa 'openorgs____::5d55fb216b14691cf68218daf5d78cd9', -- Munster Technological University 'openorgs____::0fccc7640f0cb44d5cd1b06b312a06b9', -- Cardiff University - 'openorgs____::8839b55dae0c84d56fd533f52d5d483a', -- Leibniz Institute of Ecological Urban and Regional Development + 'openorgs____::8839b55dae0c84d56fd533f52d5d483a', -- Leibniz Institute of Ecological Urban and Regional Development 'openorgs____::526468206bca24c1c90da6a312295cf4', -- Cyprus University of Technology 'openorgs____::b5ca9d4340e26454e367e2908ef3872f', -- Alma Mater Studiorum University of Bologna - 'openorgs____::a6340e6ecf60f6bba163659df985b0f2', -- TU Dresden - 'openorgs____::64badd35233ba2cd4946368ef2f4cf57', -- University of Vienna + 'openorgs____::a6340e6ecf60f6bba163659df985b0f2', -- TU Dresden + 'openorgs____::64badd35233ba2cd4946368ef2f4cf57', -- University of Vienna 'openorgs____::7501d66d2297a963ebfb075c43fff88e', -- Royal Institute of Technology - 'openorgs____::d5eb679abdd31f70fcd4c8ba711148bf', -- Sorbonne University - 'openorgs____::b316f25380d106aac402f5ae8653910d' -- Centre for Research on Ecology and Forestry Applications + 'openorgs____::d5eb679abdd31f70fcd4c8ba711148bf', -- Sorbonne University + 'openorgs____::b316f25380d106aac402f5ae8653910d' -- Centre for Research on Ecology and Forestry Applications ) )) foo; create view if not exists TARGET.category as select * from SOURCE.category;