From afcad083967d6b0c3b177410752d0e465a7b7e20 Mon Sep 17 00:00:00 2001 From: dimitrispie Date: Thu, 15 Jun 2023 10:28:49 +0300 Subject: [PATCH 1/5] Update step20-createMonitorDB_institutions.sql Added openorgs____::c0b262bd6eab819e4c994914f9c010e2 -- National Institute of Geophysics and Volcanology --- .../oozie_app/scripts/step20-createMonitorDB_institutions.sql | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_institutions.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_institutions.sql index 121ee6e7f..bb96d516c 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_institutions.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_institutions.sql @@ -50,7 +50,8 @@ create table TARGET.result stored as parquet as 'openorgs____::846cb428d3f52a445f7275561a7beb5d', -- University of Manitoba 'openorgs____::eb391317ed0dc684aa81ac16265de041', -- Universitat Rovira i Virgili 'openorgs____::66aa9fc2fceb271423dfabcc38752dc0', -- Lund University - 'openorgs____::3cff625a4370d51e08624cc586138b2f' -- IMT Atlantique + 'openorgs____::3cff625a4370d51e08624cc586138b2f', -- IMT Atlantique + 'openorgs____::c0b262bd6eab819e4c994914f9c010e2' -- National Institute of Geophysics and Volcanology ))) foo; ANALYZE TABLE TARGET.result COMPUTE STATISTICS; \ No newline at end of file From e06d962a6abca48fe967544d0a44ca324ee8501b Mon Sep 17 00:00:00 2001 From: dimitrispie Date: Thu, 15 Jun 2023 12:20:35 +0300 Subject: [PATCH 2/5] Update step15.sql --- .../dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql index 7eba908fd..3724641c0 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql @@ -35,8 +35,8 @@ select * from ${stats_db_name}.software_refereed union all select * from ${stats_db_name}.otherresearchproduct_refereed; -create table if not exists indi_impact_measures as -select distinct substr(id, 4) as id, measures_ids.id impactmetric, measures_ids.unit.value[0] score, +create table if not exists indi_impact_measures STORED AS PARQUET as +select substr(id, 4) as id, measures_ids.id impactmetric, measures_ids.unit.value[0] score, cast(measures_ids.unit.value[0] as decimal(6,3)) score_dec, measures_ids.unit.value[1] class from ${openaire_db_name}.result lateral view explode(measures) measures as measures_ids where measures_ids.id!='views' and measures_ids.id!='downloads'; From 4c770a5e29e8604e193bc364d19e74927b213bd1 Mon Sep 17 00:00:00 2001 From: dimitrispie Date: Thu, 15 Jun 2023 13:25:37 +0300 Subject: [PATCH 3/5] Update finalizeImpalaCluster.sh Drop views in shadow dbs before dropping the db --- .../dhp/oa/graph/stats/oozie_app/finalizeImpalaCluster.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/finalizeImpalaCluster.sh b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/finalizeImpalaCluster.sh index a77b5a113..857635b6c 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/finalizeImpalaCluster.sh +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/finalizeImpalaCluster.sh @@ -10,6 +10,12 @@ function createShadowDB() { SOURCE=$1 SHADOW=$2 + # drop views from db + for i in `impala-shell -i impala-cluster-dn1.openaire.eu -d ${SHADOW} --delimited -q "show tables"`; + do + `impala-shell -i impala-cluster-dn1.openaire.eu -d -d ${SHADOW} -q "drop view $i;"`; + done + impala-shell -i impala-cluster-dn1.openaire.eu -q "drop database ${SHADOW} CASCADE"; impala-shell -i impala-cluster-dn1.openaire.eu -q "create database if not exists ${SHADOW}"; # impala-shell -i impala-cluster-dn1.openaire.eu -d ${SHADOW} -q "show tables" | sed "s/^/drop view if exists ${SHADOW}./" | sed "s/$/;/" | impala-shell -i impala-cluster-dn1.openaire.eu -f - From 36e0a8fec4cfcd854c4c484bbed28d21658c7a8d Mon Sep 17 00:00:00 2001 From: dimitrispie Date: Mon, 19 Jun 2023 09:44:34 +0300 Subject: [PATCH 4/5] Changes to Promotion Stats WF 1. Add new cluster host at impala-shell commands 2. Add a step for splitting monitor dbs 3. Update workflow.xml to included the new splitting monitor dbs step --- .../stats/oozie_app/updateProductionViews.sh | 6 +-- .../oozie_app/updateProductionViewsMonitor.sh | 38 +++++++++++++++++++ .../dhp/oa/graph/stats/oozie_app/workflow.xml | 4 +- 3 files changed, 43 insertions(+), 5 deletions(-) create mode 100644 dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/updateProductionViewsMonitor.sh diff --git a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/updateProductionViews.sh b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/updateProductionViews.sh index 3e510e87e..a6d7b289d 100644 --- a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/updateProductionViews.sh +++ b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/updateProductionViews.sh @@ -10,7 +10,7 @@ export SOURCE=$1 export PRODUCTION=$2 echo "Updating ${PRODUCTION} database" -impala-shell -q "create database if not exists ${PRODUCTION}" -impala-shell -d ${PRODUCTION} -q "show tables" --delimited | sed "s/^/drop view if exists ${PRODUCTION}./" | sed "s/$/;/" | impala-shell -c -f - -impala-shell -d ${SOURCE} -q "show tables" --delimited | sed "s/\(.*\)/create view ${PRODUCTION}.\1 as select * from ${SOURCE}.\1;/" | impala-shell -c -f - +impala-shell -i impala-cluster-dn1.openaire.eu -q "create database if not exists ${PRODUCTION}" +impala-shell -i impala-cluster-dn1.openaire.eu -d ${PRODUCTION} -q "show tables" --delimited | sed "s/^/drop view if exists ${PRODUCTION}./" | sed "s/$/;/" | impala-shell -c -f - +impala-shell -i impala-cluster-dn1.openaire.eu -d ${SOURCE} -q "show tables" --delimited | sed "s/\(.*\)/create view ${PRODUCTION}.\1 as select * from ${SOURCE}.\1;/" | impala-shell -c -f - echo "Production db ready!" \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/updateProductionViewsMonitor.sh b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/updateProductionViewsMonitor.sh new file mode 100644 index 000000000..68844b14c --- /dev/null +++ b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/updateProductionViewsMonitor.sh @@ -0,0 +1,38 @@ +export PYTHON_EGG_CACHE=/home/$(whoami)/.python-eggs +export link_folder=/tmp/impala-shell-python-egg-cache-$(whoami) +if ! [ -L $link_folder ] +then + rm -Rf "$link_folder" + ln -sfn ${PYTHON_EGG_CACHE}${link_folder} ${link_folder} +fi + +export SOURCE=$1 +export PRODUCTION=$2 + +echo "Updating ${PRODUCTION}'_funded' database" +impala-shell -i impala-cluster-dn1.openaire.eu -q "create database if not exists ${PRODUCTION}'_funded'" +impala-shell -i impala-cluster-dn1.openaire.eu -d ${PRODUCTION}'_funded' -q "show tables" --delimited | sed "s/^/drop view if exists ${PRODUCTION}'_funded'./" | sed "s/$/;/" | impala-shell -c -f - +impala-shell -i impala-cluster-dn1.openaire.eu -d ${SOURCE}'_funded' -q "show tables" --delimited | sed "s/\(.*\)/create view ${PRODUCTION}'_funded'.\1 as select * from ${SOURCE}'_funded'.\1;/" | impala-shell -c -f - +echo "Production funded db ready!" + +echo "Updating ${PRODUCTION}'_institutions' database" +impala-shell -i impala-cluster-dn1.openaire.eu -q "create database if not exists ${PRODUCTION}'_institutions'" +impala-shell -i impala-cluster-dn1.openaire.eu -d ${PRODUCTION}'_institutions' -q "show tables" --delimited | sed "s/^/drop view if exists ${PRODUCTION}'_institutions'./" | sed "s/$/;/" | impala-shell -c -f - +impala-shell -i impala-cluster-dn1.openaire.eu -d ${SOURCE}'_institutions' -q "show tables" --delimited | sed "s/\(.*\)/create view ${PRODUCTION}'_institutions'.\1 as select * from ${SOURCE}'_institutions'.\1;/" | impala-shell -c -f - +echo "Production insitutions db ready!" + +echo "Updating ${PRODUCTION}'_ris_tail' database" +impala-shell -i impala-cluster-dn1.openaire.eu -q "create database if not exists ${PRODUCTION}'_ris_tail'" +impala-shell -i impala-cluster-dn1.openaire.eu -d ${PRODUCTION}'_ris_tail' -q "show tables" --delimited | sed "s/^/drop view if exists ${PRODUCTION}'_ris_tail'./" | sed "s/$/;/" | impala-shell -c -f - +impala-shell -i impala-cluster-dn1.openaire.eu -d ${SOURCE}'_RIs_tail' -q "show tables" --delimited | sed "s/\(.*\)/create view ${PRODUCTION}'_ris_tail'.\1 as select * from ${SOURCE}'_ris_tail'.\1;/" | impala-shell -c -f - +echo "Production RIS tail db ready!" + +contexts="knowmad::other dh-ch::other enermaps::other gotriple::other neanias-atmospheric::other rural-digital-europe::other covid-19::other aurora::other neanias-space::other north-america-studies::other north-american-studies::other eutopia::other" +for i in ${contexts} +do + tmp=`echo "$i" | sed 's/'-'/'_'/g' | sed 's/'::'/'_'/g'` + impala-shell -i impala-cluster-dn1.openaire.eu -q "create database if not exists ${PRODUCTION}'_'${tmp}" + impala-shell -i impala-cluster-dn1.openaire.eu -d ${PRODUCTION}'_'${tmp} -q "show tables" --delimited | sed "s/^/drop view if exists ${PRODUCTION}'_'${tmp}./" | sed "s/$/;/" | impala-shell -c -f - + impala-shell -i impala-cluster-dn1.openaire.eu -d ${SOURCE}'_'${tmp} -q "show tables" --delimited | sed "s/\(.*\)/create view ${PRODUCTION}'_'${tmp}.\1 as select * from ${SOURCE}'_'${tmp}.\1;/" | impala-shell -c -f - + echo "Production ${tmp} db ready!" +done diff --git a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml index 8286e5039..605c86ac9 100644 --- a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml @@ -80,10 +80,10 @@ ${jobTracker} ${nameNode} - updateProductionViews.sh + updateProductionViewsMonitor.sh ${monitor_db_name} ${monitor_db_production_name} - updateProductionViews.sh + updateProductionViewsMonitor.sh From be2caedb04bd76a38d7a69684362c18517faf0f3 Mon Sep 17 00:00:00 2001 From: dimitrispie Date: Mon, 19 Jun 2023 12:12:17 +0300 Subject: [PATCH 5/5] Update step20-createMonitorDB_institutions.sql Add openorgs____::1624ff7c01bb641b91f4518539a0c28a Vrije Universiteit Amsterdam --- .../oozie_app/scripts/step20-createMonitorDB_institutions.sql | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_institutions.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_institutions.sql index bb96d516c..442e623cd 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_institutions.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_institutions.sql @@ -51,7 +51,8 @@ create table TARGET.result stored as parquet as 'openorgs____::eb391317ed0dc684aa81ac16265de041', -- Universitat Rovira i Virgili 'openorgs____::66aa9fc2fceb271423dfabcc38752dc0', -- Lund University 'openorgs____::3cff625a4370d51e08624cc586138b2f', -- IMT Atlantique - 'openorgs____::c0b262bd6eab819e4c994914f9c010e2' -- National Institute of Geophysics and Volcanology + 'openorgs____::c0b262bd6eab819e4c994914f9c010e2', -- National Institute of Geophysics and Volcanology + 'openorgs____::1624ff7c01bb641b91f4518539a0c28a' -- Vrije Universiteit Amsterdam ))) foo; ANALYZE TABLE TARGET.result COMPUTE STATISTICS; \ No newline at end of file