Merge pull request 'Update step20-createMonitorDB_institutions.sql' (#309) from antonis.lempesis/dnet-hadoop:beta into beta

Reviewed-on: D-Net/dnet-hadoop#309
This commit is contained in:
Claudio Atzori 2023-06-20 15:07:09 +02:00
commit 0561362de2
6 changed files with 54 additions and 8 deletions

View File

@ -10,7 +10,7 @@ export SOURCE=$1
export PRODUCTION=$2 export PRODUCTION=$2
echo "Updating ${PRODUCTION} database" echo "Updating ${PRODUCTION} database"
impala-shell -q "create database if not exists ${PRODUCTION}" impala-shell -i impala-cluster-dn1.openaire.eu -q "create database if not exists ${PRODUCTION}"
impala-shell -d ${PRODUCTION} -q "show tables" --delimited | sed "s/^/drop view if exists ${PRODUCTION}./" | sed "s/$/;/" | impala-shell -c -f - impala-shell -i impala-cluster-dn1.openaire.eu -d ${PRODUCTION} -q "show tables" --delimited | sed "s/^/drop view if exists ${PRODUCTION}./" | sed "s/$/;/" | impala-shell -c -f -
impala-shell -d ${SOURCE} -q "show tables" --delimited | sed "s/\(.*\)/create view ${PRODUCTION}.\1 as select * from ${SOURCE}.\1;/" | impala-shell -c -f - impala-shell -i impala-cluster-dn1.openaire.eu -d ${SOURCE} -q "show tables" --delimited | sed "s/\(.*\)/create view ${PRODUCTION}.\1 as select * from ${SOURCE}.\1;/" | impala-shell -c -f -
echo "Production db ready!" echo "Production db ready!"

View File

@ -0,0 +1,38 @@
export PYTHON_EGG_CACHE=/home/$(whoami)/.python-eggs
export link_folder=/tmp/impala-shell-python-egg-cache-$(whoami)
if ! [ -L $link_folder ]
then
rm -Rf "$link_folder"
ln -sfn ${PYTHON_EGG_CACHE}${link_folder} ${link_folder}
fi
export SOURCE=$1
export PRODUCTION=$2
echo "Updating ${PRODUCTION}'_funded' database"
impala-shell -i impala-cluster-dn1.openaire.eu -q "create database if not exists ${PRODUCTION}'_funded'"
impala-shell -i impala-cluster-dn1.openaire.eu -d ${PRODUCTION}'_funded' -q "show tables" --delimited | sed "s/^/drop view if exists ${PRODUCTION}'_funded'./" | sed "s/$/;/" | impala-shell -c -f -
impala-shell -i impala-cluster-dn1.openaire.eu -d ${SOURCE}'_funded' -q "show tables" --delimited | sed "s/\(.*\)/create view ${PRODUCTION}'_funded'.\1 as select * from ${SOURCE}'_funded'.\1;/" | impala-shell -c -f -
echo "Production funded db ready!"
echo "Updating ${PRODUCTION}'_institutions' database"
impala-shell -i impala-cluster-dn1.openaire.eu -q "create database if not exists ${PRODUCTION}'_institutions'"
impala-shell -i impala-cluster-dn1.openaire.eu -d ${PRODUCTION}'_institutions' -q "show tables" --delimited | sed "s/^/drop view if exists ${PRODUCTION}'_institutions'./" | sed "s/$/;/" | impala-shell -c -f -
impala-shell -i impala-cluster-dn1.openaire.eu -d ${SOURCE}'_institutions' -q "show tables" --delimited | sed "s/\(.*\)/create view ${PRODUCTION}'_institutions'.\1 as select * from ${SOURCE}'_institutions'.\1;/" | impala-shell -c -f -
echo "Production insitutions db ready!"
echo "Updating ${PRODUCTION}'_ris_tail' database"
impala-shell -i impala-cluster-dn1.openaire.eu -q "create database if not exists ${PRODUCTION}'_ris_tail'"
impala-shell -i impala-cluster-dn1.openaire.eu -d ${PRODUCTION}'_ris_tail' -q "show tables" --delimited | sed "s/^/drop view if exists ${PRODUCTION}'_ris_tail'./" | sed "s/$/;/" | impala-shell -c -f -
impala-shell -i impala-cluster-dn1.openaire.eu -d ${SOURCE}'_RIs_tail' -q "show tables" --delimited | sed "s/\(.*\)/create view ${PRODUCTION}'_ris_tail'.\1 as select * from ${SOURCE}'_ris_tail'.\1;/" | impala-shell -c -f -
echo "Production RIS tail db ready!"
contexts="knowmad::other dh-ch::other enermaps::other gotriple::other neanias-atmospheric::other rural-digital-europe::other covid-19::other aurora::other neanias-space::other north-america-studies::other north-american-studies::other eutopia::other"
for i in ${contexts}
do
tmp=`echo "$i" | sed 's/'-'/'_'/g' | sed 's/'::'/'_'/g'`
impala-shell -i impala-cluster-dn1.openaire.eu -q "create database if not exists ${PRODUCTION}'_'${tmp}"
impala-shell -i impala-cluster-dn1.openaire.eu -d ${PRODUCTION}'_'${tmp} -q "show tables" --delimited | sed "s/^/drop view if exists ${PRODUCTION}'_'${tmp}./" | sed "s/$/;/" | impala-shell -c -f -
impala-shell -i impala-cluster-dn1.openaire.eu -d ${SOURCE}'_'${tmp} -q "show tables" --delimited | sed "s/\(.*\)/create view ${PRODUCTION}'_'${tmp}.\1 as select * from ${SOURCE}'_'${tmp}.\1;/" | impala-shell -c -f -
echo "Production ${tmp} db ready!"
done

View File

@ -80,10 +80,10 @@
<shell xmlns="uri:oozie:shell-action:0.1"> <shell xmlns="uri:oozie:shell-action:0.1">
<job-tracker>${jobTracker}</job-tracker> <job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node> <name-node>${nameNode}</name-node>
<exec>updateProductionViews.sh</exec> <exec>updateProductionViewsMonitor.sh</exec>
<argument>${monitor_db_name}</argument> <argument>${monitor_db_name}</argument>
<argument>${monitor_db_production_name}</argument> <argument>${monitor_db_production_name}</argument>
<file>updateProductionViews.sh</file> <file>updateProductionViewsMonitor.sh</file>
</shell> </shell>
<ok to="updateObservatoryViews"/> <ok to="updateObservatoryViews"/>
<error to="Kill"/> <error to="Kill"/>

View File

@ -10,6 +10,12 @@ function createShadowDB() {
SOURCE=$1 SOURCE=$1
SHADOW=$2 SHADOW=$2
# drop views from db
for i in `impala-shell -i impala-cluster-dn1.openaire.eu -d ${SHADOW} --delimited -q "show tables"`;
do
`impala-shell -i impala-cluster-dn1.openaire.eu -d -d ${SHADOW} -q "drop view $i;"`;
done
impala-shell -i impala-cluster-dn1.openaire.eu -q "drop database ${SHADOW} CASCADE"; impala-shell -i impala-cluster-dn1.openaire.eu -q "drop database ${SHADOW} CASCADE";
impala-shell -i impala-cluster-dn1.openaire.eu -q "create database if not exists ${SHADOW}"; impala-shell -i impala-cluster-dn1.openaire.eu -q "create database if not exists ${SHADOW}";
# impala-shell -i impala-cluster-dn1.openaire.eu -d ${SHADOW} -q "show tables" | sed "s/^/drop view if exists ${SHADOW}./" | sed "s/$/;/" | impala-shell -i impala-cluster-dn1.openaire.eu -f - # impala-shell -i impala-cluster-dn1.openaire.eu -d ${SHADOW} -q "show tables" | sed "s/^/drop view if exists ${SHADOW}./" | sed "s/$/;/" | impala-shell -i impala-cluster-dn1.openaire.eu -f -

View File

@ -35,8 +35,8 @@ select * from ${stats_db_name}.software_refereed
union all union all
select * from ${stats_db_name}.otherresearchproduct_refereed; select * from ${stats_db_name}.otherresearchproduct_refereed;
create table if not exists indi_impact_measures as create table if not exists indi_impact_measures STORED AS PARQUET as
select distinct substr(id, 4) as id, measures_ids.id impactmetric, measures_ids.unit.value[0] score, select substr(id, 4) as id, measures_ids.id impactmetric, measures_ids.unit.value[0] score,
cast(measures_ids.unit.value[0] as decimal(6,3)) score_dec, measures_ids.unit.value[1] class cast(measures_ids.unit.value[0] as decimal(6,3)) score_dec, measures_ids.unit.value[1] class
from ${openaire_db_name}.result lateral view explode(measures) measures as measures_ids from ${openaire_db_name}.result lateral view explode(measures) measures as measures_ids
where measures_ids.id!='views' and measures_ids.id!='downloads'; where measures_ids.id!='views' and measures_ids.id!='downloads';

View File

@ -50,7 +50,9 @@ create table TARGET.result stored as parquet as
'openorgs____::846cb428d3f52a445f7275561a7beb5d', -- University of Manitoba 'openorgs____::846cb428d3f52a445f7275561a7beb5d', -- University of Manitoba
'openorgs____::eb391317ed0dc684aa81ac16265de041', -- Universitat Rovira i Virgili 'openorgs____::eb391317ed0dc684aa81ac16265de041', -- Universitat Rovira i Virgili
'openorgs____::66aa9fc2fceb271423dfabcc38752dc0', -- Lund University 'openorgs____::66aa9fc2fceb271423dfabcc38752dc0', -- Lund University
'openorgs____::3cff625a4370d51e08624cc586138b2f' -- IMT Atlantique 'openorgs____::3cff625a4370d51e08624cc586138b2f', -- IMT Atlantique
'openorgs____::c0b262bd6eab819e4c994914f9c010e2', -- National Institute of Geophysics and Volcanology
'openorgs____::1624ff7c01bb641b91f4518539a0c28a' -- Vrije Universiteit Amsterdam
))) foo; ))) foo;
ANALYZE TABLE TARGET.result COMPUTE STATISTICS; ANALYZE TABLE TARGET.result COMPUTE STATISTICS;