forked from D-Net/dnet-hadoop
Merge pull request '[BETA] fixed the result_country definition and updated the stats DB copy procedure' (#416) from antonis.lempesis/dnet-hadoop:beta into beta
Reviewed-on: D-Net/dnet-hadoop#416
This commit is contained in:
commit
26b97aa5ed
|
@ -9,15 +9,27 @@ fi
|
||||||
export HADOOP_USER_NAME=$2
|
export HADOOP_USER_NAME=$2
|
||||||
|
|
||||||
IMPALA_HDFS_NODE=''
|
IMPALA_HDFS_NODE=''
|
||||||
if hdfs dfs -test -e hdfs://impala-cluster-mn1.openaire.eu >/dev/null 2>&1; then
|
COUNTER=0
|
||||||
|
|
||||||
|
while [ $COUNTER -lt 3 ]; do
|
||||||
|
if hdfs dfs -test -e hdfs://impala-cluster-mn1.openaire.eu/tmp >/dev/null 2>&1; then
|
||||||
IMPALA_HDFS_NODE='hdfs://impala-cluster-mn1.openaire.eu:8020'
|
IMPALA_HDFS_NODE='hdfs://impala-cluster-mn1.openaire.eu:8020'
|
||||||
elif hdfs dfs -test -e hdfs://impala-cluster-mn2.openaire.eu >/dev/null 2>&1; then
|
break
|
||||||
|
elif hdfs dfs -test -e hdfs://impala-cluster-mn2.openaire.eu/tmp >/dev/null 2>&1; then
|
||||||
IMPALA_HDFS_NODE='hdfs://impala-cluster-mn2.openaire.eu:8020'
|
IMPALA_HDFS_NODE='hdfs://impala-cluster-mn2.openaire.eu:8020'
|
||||||
else
|
break
|
||||||
echo -e "\n\nPROBLEM WHEN SETTING THE HDFS-NODE FOR IMPALA CLUSTER!\n\n"
|
else
|
||||||
|
IMPALA_HDFS_NODE=''
|
||||||
|
sleep 1
|
||||||
|
fi
|
||||||
|
((COUNTER++))
|
||||||
|
done
|
||||||
|
|
||||||
|
if [ -z "$IMPALA_HDFS_NODE" ]; then
|
||||||
|
echo -e "\n\nPROBLEM WHEN SETTING THE HDFS-NODE FOR IMPALA CLUSTER! $COUNTER\n\n"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
echo "Active IMPALA HDFS Node: ${IMPALA_HDFS_NODE}"
|
echo "Active IMPALA HDFS Node: ${IMPALA_HDFS_NODE} , after ${COUNTER} retries."
|
||||||
|
|
||||||
|
|
||||||
function copydb() {
|
function copydb() {
|
||||||
|
|
|
@ -9,15 +9,28 @@ fi
|
||||||
export HADOOP_USER_NAME=$2
|
export HADOOP_USER_NAME=$2
|
||||||
|
|
||||||
IMPALA_HDFS_NODE=''
|
IMPALA_HDFS_NODE=''
|
||||||
if hdfs dfs -test -e hdfs://impala-cluster-mn1.openaire.eu >/dev/null 2>&1; then
|
COUNTER=0
|
||||||
|
|
||||||
|
while [ $COUNTER -lt 3 ]; do
|
||||||
|
if hdfs dfs -test -e hdfs://impala-cluster-mn1.openaire.eu/tmp >/dev/null 2>&1; then
|
||||||
IMPALA_HDFS_NODE='hdfs://impala-cluster-mn1.openaire.eu:8020'
|
IMPALA_HDFS_NODE='hdfs://impala-cluster-mn1.openaire.eu:8020'
|
||||||
elif hdfs dfs -test -e hdfs://impala-cluster-mn2.openaire.eu >/dev/null 2>&1; then
|
break
|
||||||
|
elif hdfs dfs -test -e hdfs://impala-cluster-mn2.openaire.eu/tmp >/dev/null 2>&1; then
|
||||||
IMPALA_HDFS_NODE='hdfs://impala-cluster-mn2.openaire.eu:8020'
|
IMPALA_HDFS_NODE='hdfs://impala-cluster-mn2.openaire.eu:8020'
|
||||||
else
|
break
|
||||||
echo -e "\n\nPROBLEM WHEN SETTING THE HDFS-NODE FOR IMPALA CLUSTER!\n\n"
|
else
|
||||||
|
IMPALA_HDFS_NODE=''
|
||||||
|
sleep 1
|
||||||
|
fi
|
||||||
|
((COUNTER++))
|
||||||
|
done
|
||||||
|
|
||||||
|
if [ -z "$IMPALA_HDFS_NODE" ]; then
|
||||||
|
echo -e "\n\nPROBLEM WHEN SETTING THE HDFS-NODE FOR IMPALA CLUSTER! $COUNTER\n\n"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
echo "Active IMPALA HDFS Node: ${IMPALA_HDFS_NODE}"
|
echo "Active IMPALA HDFS Node: ${IMPALA_HDFS_NODE} , after ${COUNTER} retries."
|
||||||
|
|
||||||
|
|
||||||
function copydb() {
|
function copydb() {
|
||||||
|
|
||||||
|
|
|
@ -9,15 +9,28 @@ fi
|
||||||
#export HADOOP_USER_NAME=$2
|
#export HADOOP_USER_NAME=$2
|
||||||
|
|
||||||
IMPALA_HDFS_NODE=''
|
IMPALA_HDFS_NODE=''
|
||||||
if hdfs dfs -test -e hdfs://impala-cluster-mn1.openaire.eu >/dev/null 2>&1; then
|
COUNTER=0
|
||||||
|
|
||||||
|
while [ $COUNTER -lt 3 ]; do
|
||||||
|
if hdfs dfs -test -e hdfs://impala-cluster-mn1.openaire.eu/tmp >/dev/null 2>&1; then
|
||||||
IMPALA_HDFS_NODE='hdfs://impala-cluster-mn1.openaire.eu:8020'
|
IMPALA_HDFS_NODE='hdfs://impala-cluster-mn1.openaire.eu:8020'
|
||||||
elif hdfs dfs -test -e hdfs://impala-cluster-mn2.openaire.eu >/dev/null 2>&1; then
|
break
|
||||||
|
elif hdfs dfs -test -e hdfs://impala-cluster-mn2.openaire.eu/tmp >/dev/null 2>&1; then
|
||||||
IMPALA_HDFS_NODE='hdfs://impala-cluster-mn2.openaire.eu:8020'
|
IMPALA_HDFS_NODE='hdfs://impala-cluster-mn2.openaire.eu:8020'
|
||||||
else
|
break
|
||||||
echo -e "\n\nPROBLEM WHEN SETTING THE HDFS-NODE FOR IMPALA CLUSTER!\n\n"
|
else
|
||||||
|
IMPALA_HDFS_NODE=''
|
||||||
|
sleep 1
|
||||||
|
fi
|
||||||
|
((COUNTER++))
|
||||||
|
done
|
||||||
|
|
||||||
|
if [ -z "$IMPALA_HDFS_NODE" ]; then
|
||||||
|
echo -e "\n\nPROBLEM WHEN SETTING THE HDFS-NODE FOR IMPALA CLUSTER! $COUNTER\n\n"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
echo "Active IMPALA HDFS Node: ${IMPALA_HDFS_NODE}"
|
echo "Active IMPALA HDFS Node: ${IMPALA_HDFS_NODE} , after ${COUNTER} retries."
|
||||||
|
|
||||||
|
|
||||||
function copydb() {
|
function copydb() {
|
||||||
|
|
||||||
|
|
|
@ -7,15 +7,28 @@ then
|
||||||
fi
|
fi
|
||||||
|
|
||||||
IMPALA_HDFS_NODE=''
|
IMPALA_HDFS_NODE=''
|
||||||
if hdfs dfs -test -e hdfs://impala-cluster-mn1.openaire.eu >/dev/null 2>&1; then
|
COUNTER=0
|
||||||
|
|
||||||
|
while [ $COUNTER -lt 3 ]; do
|
||||||
|
if hdfs dfs -test -e hdfs://impala-cluster-mn1.openaire.eu/tmp >/dev/null 2>&1; then
|
||||||
IMPALA_HDFS_NODE='hdfs://impala-cluster-mn1.openaire.eu:8020'
|
IMPALA_HDFS_NODE='hdfs://impala-cluster-mn1.openaire.eu:8020'
|
||||||
elif hdfs dfs -test -e hdfs://impala-cluster-mn2.openaire.eu >/dev/null 2>&1; then
|
break
|
||||||
|
elif hdfs dfs -test -e hdfs://impala-cluster-mn2.openaire.eu/tmp >/dev/null 2>&1; then
|
||||||
IMPALA_HDFS_NODE='hdfs://impala-cluster-mn2.openaire.eu:8020'
|
IMPALA_HDFS_NODE='hdfs://impala-cluster-mn2.openaire.eu:8020'
|
||||||
else
|
break
|
||||||
echo -e "\n\nPROBLEM WHEN SETTING THE HDFS-NODE FOR IMPALA CLUSTER!\n\n"
|
else
|
||||||
|
IMPALA_HDFS_NODE=''
|
||||||
|
sleep 1
|
||||||
|
fi
|
||||||
|
((COUNTER++))
|
||||||
|
done
|
||||||
|
|
||||||
|
if [ -z "$IMPALA_HDFS_NODE" ]; then
|
||||||
|
echo -e "\n\nPROBLEM WHEN SETTING THE HDFS-NODE FOR IMPALA CLUSTER! $COUNTER\n\n"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
echo "Active IMPALA HDFS Node: ${IMPALA_HDFS_NODE}"
|
echo "Active IMPALA HDFS Node: ${IMPALA_HDFS_NODE} , after ${COUNTER} retries."
|
||||||
|
|
||||||
|
|
||||||
export HADOOP_USER_NAME=$6
|
export HADOOP_USER_NAME=$6
|
||||||
export PROD_USAGE_STATS_DB="openaire_prod_usage_stats"
|
export PROD_USAGE_STATS_DB="openaire_prod_usage_stats"
|
||||||
|
|
|
@ -85,12 +85,12 @@ hive $HIVE_OPTS --database ${2}_funded -e "show tables" | grep -v WARN | sed "s/
|
||||||
hive -f foo
|
hive -f foo
|
||||||
echo "Updated shadow monitor funded database"
|
echo "Updated shadow monitor funded database"
|
||||||
|
|
||||||
echo "Updating shadow monitor insitutions database"
|
echo "Updating shadow monitor institutions database"
|
||||||
hive -e "drop database if exists ${SHADOW}_institutions cascade"
|
hive -e "drop database if exists ${SHADOW}_institutions cascade"
|
||||||
hive -e "create database if not exists ${SHADOW}_institutions"
|
hive -e "create database if not exists ${SHADOW}_institutions"
|
||||||
hive $HIVE_OPTS --database ${2}_institutions -e "show tables" | grep -v WARN | sed "s/\(.*\)/create view ${SHADOW}_institutions.\1 as select * from ${2}_institutions.\1;/" > foo
|
hive $HIVE_OPTS --database ${2}_institutions -e "show tables" | grep -v WARN | sed "s/\(.*\)/create view ${SHADOW}_institutions.\1 as select * from ${2}_institutions.\1;/" > foo
|
||||||
hive -f foo
|
hive -f foo
|
||||||
echo "Shadow db monitor insitutions ready!"
|
echo "Shadow db monitor institutions ready!"
|
||||||
|
|
||||||
echo "Updating shadow monitor RIs database"
|
echo "Updating shadow monitor RIs database"
|
||||||
for i in $contexts
|
for i in $contexts
|
||||||
|
|
|
@ -335,8 +335,8 @@ select ar.organization, rf.no_result_fair/ar.no_allresults org_fairness
|
||||||
from allresults ar
|
from allresults ar
|
||||||
join result_fair rf on rf.organization=ar.organization; /*EOS*/
|
join result_fair rf on rf.organization=ar.organization; /*EOS*/
|
||||||
|
|
||||||
DROP VIEW result_fair; /*EOS*/
|
DROP VIEW result_fair;
|
||||||
DROP VIEW allresults; /*EOS*/
|
DROP VIEW allresults;
|
||||||
|
|
||||||
CREATE TEMPORARY VIEW result_fair as
|
CREATE TEMPORARY VIEW result_fair as
|
||||||
select year, ro.organization organization, count(distinct ro.id) no_result_fair from ${stats_db_name}.result_organization ro
|
select year, ro.organization organization, count(distinct ro.id) no_result_fair from ${stats_db_name}.result_organization ro
|
||||||
|
@ -1000,13 +1000,13 @@ left outer join (
|
||||||
drop table if exists ${stats_db_name}.result_country purge; /*EOS*/
|
drop table if exists ${stats_db_name}.result_country purge; /*EOS*/
|
||||||
|
|
||||||
create table ${stats_db_name}.result_country stored as parquet as
|
create table ${stats_db_name}.result_country stored as parquet as
|
||||||
select distinct ro.id, coalesce(o.country, f.country) as country
|
select distinct ro.id, coalesce(o.country, f.country)
|
||||||
from ${stats_db_name}.result_organization ro
|
from ${stats_db_name}.result_organization ro
|
||||||
left outer join ${stats_db_name}.organization o on o.id=ro.organization
|
left outer join ${stats_db_name}.organization o on o.id=ro.organization
|
||||||
left outer join ${stats_db_name}.result_projects rp on rp.id=ro.id
|
left outer join ${stats_db_name}.result_projects rp on rp.id=ro.id
|
||||||
left outer join ${stats_db_name}.project p on p.id=rp.project
|
left outer join ${stats_db_name}.project p on p.id=rp.project
|
||||||
left outer join ${stats_db_name}.funder f on f.name=p.funder
|
left outer join ${stats_db_name}.funder f on f.name=p.funder
|
||||||
where coalesce(o.country, f.country) IS NOT NULL; /*EOS*/
|
where coalesce(o.country, f.country) IS NOT NULL;
|
||||||
|
|
||||||
drop table if exists ${stats_db_name}.indi_result_oa_with_license purge; /*EOS*/
|
drop table if exists ${stats_db_name}.indi_result_oa_with_license purge; /*EOS*/
|
||||||
create table ${stats_db_name}.indi_result_oa_with_license stored as parquet as
|
create table ${stats_db_name}.indi_result_oa_with_license stored as parquet as
|
||||||
|
|
Loading…
Reference in New Issue