From 23246707141163f90c53e6c4f983b000dea38c11 Mon Sep 17 00:00:00 2001 From: dimitrispie Date: Fri, 2 Jun 2023 13:34:16 +0300 Subject: [PATCH] Split Monitor DBs-Interdisciplinarity indicators - Split DBs Monitor for faster rendering of visualizations - Add interdisciplinarity indicators from result_fos --- .../oozie_app/copyDataToImpalaCluster.sh | 12 +- .../stats/oozie_app/finalizeImpalaCluster.sh | 11 ++ .../dhp/oa/graph/stats/oozie_app/monitor.sh | 80 +++++++++- .../scripts/step16-createIndicatorsTables.sql | 16 +- .../scripts/step20-createMonitorDB.sql | 137 ++++++++++-------- .../scripts/step20-createMonitorDB_RIs.sql | 15 ++ .../step20-createMonitorDB_RIs_tail.sql | 15 ++ .../scripts/step20-createMonitorDB_funded.sql | 9 ++ .../step20-createMonitorDB_institutions.sql | 56 +++++++ .../dhp/oa/graph/stats/oozie_app/workflow.xml | 30 ++-- 10 files changed, 302 insertions(+), 79 deletions(-) create mode 100644 dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_RIs.sql create mode 100644 dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_RIs_tail.sql create mode 100644 dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_funded.sql create mode 100644 dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_institutions.sql diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/copyDataToImpalaCluster.sh b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/copyDataToImpalaCluster.sh index 66783c2343..b937eea25a 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/copyDataToImpalaCluster.sh +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/copyDataToImpalaCluster.sh @@ -68,6 +68,16 @@ copydb $USAGE_STATS_DB copydb $PROD_USAGE_STATS_DB copydb $EXT_DB copydb $STATS_DB -copydb $MONITOR_DB +#copydb $MONITOR_DB copydb $OBSERVATORY_DB +copydb $MONITOR_DB'_funded' +copydb $MONITOR_DB'_institutions' +copydb $MONITOR_DB'_RIs_tail' + +contexts="knowmad::other dh-ch::other enermaps::other gotriple::other neanias-atmospheric::other rural-digital-europe::other covid-19::other aurora::other neanias-space::other north-america-studies::other north-american-studies::other eutopia::other" +for i in ${contexts} +do + tmp=`echo "$i" | sed 's/'-'/'_'/g' | sed 's/'::'/'_'/g'` + copydb ${MONITOR_DB}'_'${tmp} +done \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/finalizeImpalaCluster.sh b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/finalizeImpalaCluster.sh index 5914b95f80..a77b5a1139 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/finalizeImpalaCluster.sh +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/finalizeImpalaCluster.sh @@ -29,3 +29,14 @@ createShadowDB $STATS_DB $STATS_DB_SHADOW createShadowDB $MONITOR_DB $MONITOR_DB_SHADOW createShadowDB $OBSERVATORY_DB $OBSERVATORY_DB_SHADOW createShadowDB USAGE_STATS_DB USAGE_STATS_DB_SHADOW + +createShadowDB $MONITOR_DB'_funded' $MONITOR_DB'_funded_shadow' +createShadowDB $MONITOR_DB'_institutions' $MONITOR_DB'_institutions_shadow' +createShadowDB $MONITOR_DB'_RIs_tail' $MONITOR_DB'_RIs_tail_shadow' + +contexts="knowmad::other dh-ch::other enermaps::other gotriple::other neanias-atmospheric::other rural-digital-europe::other covid-19::other aurora::other neanias-space::other north-america-studies::other north-american-studies::other eutopia::other" +for i in ${contexts} +do + tmp=`echo "$i" | sed 's/'-'/'_'/g' | sed 's/'::'/'_'/g'` + createShadowDB ${MONITOR_DB}'_'${tmp} ${MONITOR_DB}'_'${tmp}'_shadow' +done \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/monitor.sh b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/monitor.sh index 440aac7704..08f4c9232d 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/monitor.sh +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/monitor.sh @@ -10,16 +10,88 @@ export SOURCE=$1 export TARGET=$2 export SHADOW=$3 export SCRIPT_PATH=$4 +export SCRIPT_PATH2=$5 +export SCRIPT_PATH3=$6 +export SCRIPT_PATH4=$7 +export SCRIPT_PATH5=$8 export HIVE_OPTS="-hiveconf mapred.job.queue.name=analytics -hiveconf hive.spark.client.connect.timeout=120000ms -hiveconf hive.spark.client.server.connect.timeout=300000ms -hiveconf spark.executor.memory=19166291558 -hiveconf spark.yarn.executor.memoryOverhead=3225 -hiveconf spark.driver.memory=11596411699 -hiveconf spark.yarn.driver.memoryOverhead=1228" export HADOOP_USER_NAME="oozie" -echo "Getting file from " $SCRIPT_PATH -hdfs dfs -copyToLocal $SCRIPT_PATH +echo "Getting file from " $4 +hdfs dfs -copyToLocal $4 + +echo "Getting file from " $5 +hdfs dfs -copyToLocal $5 + +echo "Getting file from " $6 +hdfs dfs -copyToLocal $6 + +echo "Getting file from " $7 +hdfs dfs -copyToLocal $7 + +echo "Getting file from " $8 +hdfs dfs -copyToLocal $8 echo "Creating monitor database" -#cat step20-createMonitorDB.sql | sed s/SOURCE/$1/g | sed s/TARGET/$2/g1 > foo -cat step20-createMonitorDB.sql | sed "s/TARGET/${TARGET}/g" | sed "s/SOURCE/${SOURCE}/g1" > foo +cat step20-createMonitorDB_funded.sql | sed "s/SOURCE/$1/g" | sed "s/TARGET/$2_funded/g1" > foo hive $HIVE_OPTS -f foo +cat step20-createMonitorDB.sql | sed "s/SOURCE/$1/g" | sed "s/TARGET/$2_funded/g1" > foo +hive $HIVE_OPTS -f foo +# +cat step20-createMonitorDB_institutions.sql | sed "s/SOURCE/$1/g" | sed "s/TARGET/$2_institutions/g1" > foo +hive $HIVE_OPTS -f foo +cat step20-createMonitorDB.sql | sed "s/SOURCE/$1/g" | sed "s/TARGET/$2_institutions/g1" > foo +hive $HIVE_OPTS -f foo + +contexts="knowmad::other dh-ch::other enermaps::other gotriple::other neanias-atmospheric::other rural-digital-europe::other covid-19::other aurora::other neanias-space::other north-america-studies::other north-american-studies::other eutopia::other" + +for i in ${contexts} +do + tmp=`echo "$i" | sed 's/'-'/'_'/g' | sed 's/'::'/'_'/g'` + tmp2=`echo "$i" |sed 's/:.*//' ` + cat step20-createMonitorDB_RIs.sql | sed "s/SOURCE/$1/g" | sed "s/TARGET/$2_$tmp/g1" | sed "s/CONTEXT/\'%$tmp2%\'/g" > foo + hive $HIVE_OPTS -f foo + cat step20-createMonitorDB.sql | sed "s/SOURCE/$1/g" | sed "s/TARGET/$2_$tmp/g1" > foo + hive $HIVE_OPTS -f foo +done + + +cat step20-createMonitorDB_RIs_tail.sql | sed "s/SOURCE/$1/g" | sed "s/TARGET/$2_RIs_tail/g1" | sed "s/CONTEXTS/\"'knowmad::other','dh-ch::other', 'enermaps::other', 'gotriple::other', 'neanias-atmospheric::other', 'rural-digital-europe::other', 'covid-19::other', 'aurora::other', 'neanias-space::other', 'north-america-studies::other', 'north-american-studies::other', 'eutopia::other'\"/g" > foo +hive $HIVE_OPTS -f foo +cat step20-createMonitorDB.sql | sed "s/SOURCE/$1/g" | sed "s/TARGET/$2_RIs_tail/g1" > foo +hive $HIVE_OPTS -f foo + echo "Hive shell finished" +echo "Updating shadow monitor funded database" +hive -e "drop database if exists ${SHADOW}_funded cascade" +hive -e "create database if not exists ${SHADOW}_funded" +hive $HIVE_OPTS --database ${2}_funded -e "show tables" | grep -v WARN | sed "s/\(.*\)/create view ${SHADOW}_funded.\1 as select * from ${2}_funded.\1;/" > foo +hive -f foo +echo "Updated shadow monitor funded database" + +echo "Updating shadow monitor insitutions database" +hive -e "drop database if exists ${SHADOW}_institutions cascade" +hive -e "create database if not exists ${SHADOW}_institutions" +hive $HIVE_OPTS --database ${2}_institutions -e "show tables" | grep -v WARN | sed "s/\(.*\)/create view ${SHADOW}_institutions.\1 as select * from ${2}_institutions.\1;/" > foo +hive -f foo +echo "Shadow db monitor insitutions ready!" + +echo "Updating shadow monitor RIs database" +for i in $contexts +do + tmp=`echo "$i" | sed 's/'-'/'_'/g' | sed 's/'::'/'_'/g'` + hive -e "drop database if exists ${SHADOW}_${tmp} cascade" + hive -e "create database if not exists ${SHADOW}_${tmp}" + hive $HIVE_OPTS --database ${2}_${tmp} -e "show tables" | grep -v WARN | sed "s/\(.*\)/create view ${SHADOW}_${tmp}.\1 as select * from ${2}_${tmp}.\1;/" > foo + hive -f foo +done +echo "Shadow db monitor RIs ready!" + +echo "Updating shadow monitor RIs tail database" +hive -e "drop database if exists ${SHADOW}_ris_tail cascade" +hive -e "create database if not exists ${SHADOW}_ris_tail" +hive $HIVE_OPTS --database ${2}_ris_tail -e "show tables" | grep -v WARN | sed "s/\(.*\)/create view ${SHADOW}_ris_tail.\1 as select * from ${2}_ris_tail.\1;/" > foo +hive -f foo +echo "Shadow db monitor RIs tail ready!" diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql index ef573916f4..e358e0ef07 100755 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql @@ -779,4 +779,18 @@ cast(measures_ids.unit.value[0] as decimal(6,3)) score_dec, measures_ids.unit.va from result lateral view explode(measures) measures as measures_ids where measures_ids.id!='views' and measures_ids.id!='downloads'; -ANALYZE TABLE indi_impact_measures COMPUTE STATISTICS; \ No newline at end of file +ANALYZE TABLE indi_impact_measures COMPUTE STATISTICS; + +CREATE TEMPORARY TABLE pub_fos_totals as +select rf.id, count(distinct lvl3) totals from result_fos rf +group by rf.id; + +create table if not exists indi_pub_interdisciplinarity as +select distinct p.id, coalesce(indi_pub_is_interdisciplinary, 0) +as indi_pub_is_interdisciplinary +from pub_fos_totals p +left outer join ( +select pub_fos_totals.id, 1 as indi_pub_is_interdisciplinary from pub_fos_totals +where totals>10) tmp on p.id=tmp.id; + +ANALYZE TABLE indi_pub_interdisciplinarity COMPUTE STATISTICS; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql index 86b5c7ca11..9744d5aae2 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql @@ -1,5 +1,78 @@ -drop database if exists TARGET cascade; -create database if not exists TARGET; +--drop database if exists TARGET cascade; +--create database if not exists TARGET; +-- +--create view if not exists TARGET.category as select * from SOURCE.category; +--create view if not exists TARGET.concept as select * from SOURCE.concept; +--create view if not exists TARGET.context as select * from SOURCE.context; +--create view if not exists TARGET.country as select * from SOURCE.country; +--create view if not exists TARGET.countrygdp as select * from SOURCE.countrygdp; +--create view if not exists TARGET.creation_date as select * from SOURCE.creation_date; +--create view if not exists TARGET.funder as select * from SOURCE.funder; +--create view if not exists TARGET.fundref as select * from SOURCE.fundref; +--create view if not exists TARGET.rndexpenditure as select * from SOURCE.rndexpediture; +--create view if not exists TARGET.rndgdpexpenditure as select * from SOURCE.rndgdpexpenditure; +--create view if not exists TARGET.doctoratestudents as select * from SOURCE.doctoratestudents; +--create view if not exists TARGET.totalresearchers as select * from SOURCE.totalresearchers; +--create view if not exists TARGET.totalresearchersft as select * from SOURCE.totalresearchersft; +--create view if not exists TARGET.hrrst as select * from SOURCE.hrrst; +-- +--create table TARGET.result stored as parquet as +-- select distinct * from ( +-- select * from SOURCE.result r where exists (select 1 from SOURCE.result_projects rp join SOURCE.project p on rp.project=p.id where rp.id=r.id) +-- union all +-- select * from SOURCE.result r where exists (select 1 from SOURCE.result_concepts rc where rc.id=r.id) +-- union all +-- select * from SOURCE.result r where exists (select 1 from SOURCE.result_organization ro where ro.id=r.id and ro.organization in ( +-- 'openorgs____::b84450f9864182c67b8611b5593f4250', --"Athena Research and Innovation Center In Information Communication & Knowledge Technologies', --ARC" +-- 'openorgs____::d41cf6bd4ab1b1362a44397e0b95c975', --National Research Council +-- 'openorgs____::d2a09b9d5eabb10c95f9470e172d05d2', --??? Not exists ?? +-- 'openorgs____::d169c7407dd417152596908d48c11460', --Masaryk University +-- 'openorgs____::1ec924b1759bb16d0a02f2dad8689b21', --University of Belgrade +-- 'openorgs____::0ae431b820e4c33db8967fbb2b919150', --University of Helsinki +-- 'openorgs____::759d59f05d77188faee99b7493b46805', --University of Minho +-- 'openorgs____::cad284878801b9465fa51a95b1d779db', --Universidad Politécnica de Madrid +-- 'openorgs____::eadc8da90a546e98c03f896661a2e4d4', --University of Göttingen +-- 'openorgs____::c0286313e36479eff8676dba9b724b40', --National and Kapodistrian University of Athens +-- -- 'openorgs____::c80a8243a5e5c620d7931c88d93bf17a', --Université Paris Diderot +-- 'openorgs____::c08634f0a6b0081c3dc6e6c93a4314f3', --Bielefeld University +-- 'openorgs____::6fc85e4a8f7ecaf4b0c738d010e967ea', --University of Southern Denmark +-- 'openorgs____::3d6122f87f9a97a99d8f6e3d73313720', --Humboldt-Universität zu Berlin +-- 'openorgs____::16720ada63d0fa8ca41601feae7d1aa5', --TU Darmstadt +-- 'openorgs____::ccc0a066b56d2cfaf90c2ae369df16f5', --KU Leuven +-- 'openorgs____::4c6f119632adf789746f0a057ed73e90', --University of the Western Cape +-- 'openorgs____::ec3665affa01aeafa28b7852c4176dbd', --Rudjer Boskovic Institute +-- 'openorgs____::5f31346d444a7f06a28c880fb170b0f6', --Ghent University +-- 'openorgs____::2dbe47117fd5409f9c61620813456632', --University of Luxembourg +-- 'openorgs____::6445d7758d3a40c4d997953b6632a368', --National Institute of Informatics (NII) +-- 'openorgs____::b77c01aa15de3675da34277d48de2ec1', -- Valencia Catholic University Saint Vincent Martyr +-- 'openorgs____::7fe2f66cdc43983c6b24816bfe9cf6a0', -- Unviersity of Warsaw +-- 'openorgs____::15e7921fc50d9aa1229a82a84429419e', -- University Of Thessaly +-- 'openorgs____::11f7919dadc8f8a7251af54bba60c956', -- Technical University of Crete +-- 'openorgs____::84f0c5f5dbb6daf42748485924efde4b', -- University of Piraeus +-- 'openorgs____::4ac562f0376fce3539504567649cb373', -- University of Patras +-- 'openorgs____::3e8d1f8c3f6cd7f418b09f1f58b4873b', -- Aristotle University of Thessaloniki +-- 'openorgs____::3fcef6e1c469c10f2a84b281372c9814', -- World Bank +-- 'openorgs____::1698a2eb1885ef8adb5a4a969e745ad3', -- École des Ponts ParisTech +-- 'openorgs____::e15adb13c4dadd49de4d35c39b5da93a', -- Nanyang Technological University +-- 'openorgs____::4b34103bde246228fcd837f5f1bf4212', -- Autonomous University of Barcelona +-- 'openorgs____::72ec75fcfc4e0df1a76dc4c49007fceb', -- McMaster University +-- 'openorgs____::51c7fc556e46381734a25a6fbc3fd398', -- University of Modena and Reggio Emilia +-- 'openorgs____::235d7f9ad18ecd7e6dc62ea4990cb9db', -- Bilkent University +-- 'openorgs____::31f2fa9e05b49d4cf40a19c3fed8eb06', -- Saints Cyril and Methodius University of Skopje +-- 'openorgs____::db7686f30f22cbe73a4fde872ce812a6', -- University of Milan +-- 'openorgs____::b8b8ca674452579f3f593d9f5e557483', -- University College Cork +-- 'openorgs____::38d7097854736583dde879d12dacafca' -- Brown University +-- 'openorgs____::57784c9e047e826fefdb1ef816120d92', --Arts et Métiers ParisTech +-- 'openorgs____::2530baca8a15936ba2e3297f2bce2e7e', -- University of Cape Town +-- 'openorgs____::d11f981828c485cd23d93f7f24f24db1', -- Technological University Dublin +-- 'openorgs____::5e6bf8962665cdd040341171e5c631d8', -- Delft University of Technology +-- 'openorgs____::846cb428d3f52a445f7275561a7beb5d', -- University of Manitoba +-- 'openorgs____::eb391317ed0dc684aa81ac16265de041', -- Universitat Rovira i Virgili +-- 'openorgs____::66aa9fc2fceb271423dfabcc38752dc0', -- Lund University +-- 'openorgs____::3cff625a4370d51e08624cc586138b2f' -- IMT Atlantique +-- ) )) foo; +-- +--ANALYZE TABLE TARGET.result COMPUTE STATISTICS; create view if not exists TARGET.category as select * from SOURCE.category; create view if not exists TARGET.concept as select * from SOURCE.concept; @@ -16,64 +89,6 @@ create view if not exists TARGET.totalresearchers as select * from SOURCE.totalr create view if not exists TARGET.totalresearchersft as select * from SOURCE.totalresearchersft; create view if not exists TARGET.hrrst as select * from SOURCE.hrrst; -create table TARGET.result stored as parquet as - select distinct * from ( - select * from SOURCE.result r where exists (select 1 from SOURCE.result_projects rp join SOURCE.project p on rp.project=p.id where rp.id=r.id) - union all - select * from SOURCE.result r where exists (select 1 from SOURCE.result_concepts rc where rc.id=r.id) - union all - select * from SOURCE.result r where exists (select 1 from SOURCE.result_organization ro where ro.id=r.id and ro.organization in ( - 'openorgs____::b84450f9864182c67b8611b5593f4250', --"Athena Research and Innovation Center In Information Communication & Knowledge Technologies', --ARC" - 'openorgs____::d41cf6bd4ab1b1362a44397e0b95c975', --National Research Council - 'openorgs____::d2a09b9d5eabb10c95f9470e172d05d2', --??? Not exists ?? - 'openorgs____::d169c7407dd417152596908d48c11460', --Masaryk University - 'openorgs____::1ec924b1759bb16d0a02f2dad8689b21', --University of Belgrade - 'openorgs____::0ae431b820e4c33db8967fbb2b919150', --University of Helsinki - 'openorgs____::759d59f05d77188faee99b7493b46805', --University of Minho - 'openorgs____::cad284878801b9465fa51a95b1d779db', --Universidad Politécnica de Madrid - 'openorgs____::eadc8da90a546e98c03f896661a2e4d4', --University of Göttingen - 'openorgs____::c0286313e36479eff8676dba9b724b40', --National and Kapodistrian University of Athens - -- 'openorgs____::c80a8243a5e5c620d7931c88d93bf17a', --Université Paris Diderot - 'openorgs____::c08634f0a6b0081c3dc6e6c93a4314f3', --Bielefeld University - 'openorgs____::6fc85e4a8f7ecaf4b0c738d010e967ea', --University of Southern Denmark - 'openorgs____::3d6122f87f9a97a99d8f6e3d73313720', --Humboldt-Universität zu Berlin - 'openorgs____::16720ada63d0fa8ca41601feae7d1aa5', --TU Darmstadt - 'openorgs____::ccc0a066b56d2cfaf90c2ae369df16f5', --KU Leuven - 'openorgs____::4c6f119632adf789746f0a057ed73e90', --University of the Western Cape - 'openorgs____::ec3665affa01aeafa28b7852c4176dbd', --Rudjer Boskovic Institute - 'openorgs____::5f31346d444a7f06a28c880fb170b0f6', --Ghent University - 'openorgs____::2dbe47117fd5409f9c61620813456632', --University of Luxembourg - 'openorgs____::6445d7758d3a40c4d997953b6632a368', --National Institute of Informatics (NII) - 'openorgs____::b77c01aa15de3675da34277d48de2ec1', -- Valencia Catholic University Saint Vincent Martyr - 'openorgs____::7fe2f66cdc43983c6b24816bfe9cf6a0', -- Unviersity of Warsaw - 'openorgs____::15e7921fc50d9aa1229a82a84429419e', -- University Of Thessaly - 'openorgs____::11f7919dadc8f8a7251af54bba60c956', -- Technical University of Crete - 'openorgs____::84f0c5f5dbb6daf42748485924efde4b', -- University of Piraeus - 'openorgs____::4ac562f0376fce3539504567649cb373', -- University of Patras - 'openorgs____::3e8d1f8c3f6cd7f418b09f1f58b4873b', -- Aristotle University of Thessaloniki - 'openorgs____::3fcef6e1c469c10f2a84b281372c9814', -- World Bank - 'openorgs____::1698a2eb1885ef8adb5a4a969e745ad3', -- École des Ponts ParisTech - 'openorgs____::e15adb13c4dadd49de4d35c39b5da93a', -- Nanyang Technological University - 'openorgs____::4b34103bde246228fcd837f5f1bf4212', -- Autonomous University of Barcelona - 'openorgs____::72ec75fcfc4e0df1a76dc4c49007fceb', -- McMaster University - 'openorgs____::51c7fc556e46381734a25a6fbc3fd398', -- University of Modena and Reggio Emilia - 'openorgs____::235d7f9ad18ecd7e6dc62ea4990cb9db', -- Bilkent University - 'openorgs____::31f2fa9e05b49d4cf40a19c3fed8eb06', -- Saints Cyril and Methodius University of Skopje - 'openorgs____::db7686f30f22cbe73a4fde872ce812a6', -- University of Milan - 'openorgs____::b8b8ca674452579f3f593d9f5e557483', -- University College Cork - 'openorgs____::38d7097854736583dde879d12dacafca' -- Brown University - 'openorgs____::57784c9e047e826fefdb1ef816120d92', --Arts et Métiers ParisTech - 'openorgs____::2530baca8a15936ba2e3297f2bce2e7e', -- University of Cape Town - 'openorgs____::d11f981828c485cd23d93f7f24f24db1', -- Technological University Dublin - 'openorgs____::5e6bf8962665cdd040341171e5c631d8', -- Delft University of Technology - 'openorgs____::846cb428d3f52a445f7275561a7beb5d', -- University of Manitoba - 'openorgs____::eb391317ed0dc684aa81ac16265de041', -- Universitat Rovira i Virgili - 'openorgs____::66aa9fc2fceb271423dfabcc38752dc0', -- Lund University - 'openorgs____::3cff625a4370d51e08624cc586138b2f' -- IMT Atlantique - ) )) foo; - -ANALYZE TABLE TARGET.result COMPUTE STATISTICS; - create table TARGET.result_citations stored as parquet as select * from SOURCE.result_citations orig where exists (select 1 from TARGET.result r where r.id=orig.id); ANALYZE TABLE TARGET.result_citations COMPUTE STATISTICS; @@ -251,3 +266,5 @@ create table TARGET.indi_result_with_pid stored as parquet as select * from SOUR ANALYZE TABLE TARGET.indi_result_with_pid COMPUTE STATISTICS; create table TARGET.indi_impact_measures stored as parquet as select * from SOURCE.indi_impact_measures orig where exists (select 1 from TARGET.result r where r.id=orig.id); ANALYZE TABLE TARGET.indi_impact_measures COMPUTE STATISTICS; +create table TARGET.indi_pub_interdisciplinarity stored as parquet as select * from SOURCE.indi_pub_interdisciplinarity orig where exists (select 1 from TARGET.result r where r.id=orig.id); +ANALYZE TABLE TARGET.indi_pub_interdisciplinarity COMPUTE STATISTICS; diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_RIs.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_RIs.sql new file mode 100644 index 0000000000..92b40405d4 --- /dev/null +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_RIs.sql @@ -0,0 +1,15 @@ +drop database if exists TARGET cascade; +create database if not exists TARGET; + +create table TARGET.result stored as parquet as + select distinct * from ( + select * from SOURCE.result r where exists + (select 1 + from SOURCE.result_concepts rc + join SOURCE.concept conc on conc.id=rc.concept + join SOURCE.category cat on cat.id=conc.category + join SOURCE.context cont on cont.id=cat.context +-- join SOURCE.result + where rc.id=r.id and conc.category like CONTEXT) +) foo; +ANALYZE TABLE TARGET.result COMPUTE STATISTICS; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_RIs_tail.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_RIs_tail.sql new file mode 100644 index 0000000000..ef6d08d797 --- /dev/null +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_RIs_tail.sql @@ -0,0 +1,15 @@ +drop database if exists TARGET cascade; +create database if not exists TARGET; + +create table TARGET.result stored as parquet as + select distinct * from ( + select * from SOURCE.result r where exists + (select 1 + from SOURCE.result_concepts rc + join SOURCE.concept conc on conc.id=rc.concept + join SOURCE.category cat on cat.id=conc.category + join SOURCE.context cont on cont.id=cat.context +-- join SOURCE.result + where rc.id=r.id and conc.category not in (CONTEXTS)) +) foo; +ANALYZE TABLE TARGET.result COMPUTE STATISTICS; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_funded.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_funded.sql new file mode 100644 index 0000000000..8d8739c743 --- /dev/null +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_funded.sql @@ -0,0 +1,9 @@ +drop database if exists TARGET cascade; +create database if not exists TARGET; + +create table TARGET.result stored as parquet as + select distinct * from ( + select * from SOURCE.result r where exists (select 1 from SOURCE.result_projects rp join SOURCE.project p on rp.project=p.id where rp.id=r.id) + ) foo; + +ANALYZE TABLE TARGET.result COMPUTE STATISTICS; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_institutions.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_institutions.sql new file mode 100644 index 0000000000..121ee6e7f4 --- /dev/null +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_institutions.sql @@ -0,0 +1,56 @@ +drop database if exists TARGET cascade; +create database if not exists TARGET; + +create table TARGET.result stored as parquet as + select distinct * from ( + select * from SOURCE.result r where exists (select 1 from SOURCE.result_organization ro where ro.id=r.id and ro.organization in ( + 'openorgs____::b84450f9864182c67b8611b5593f4250', --"Athena Research and Innovation Center In Information Communication & Knowledge Technologies', --ARC" + 'openorgs____::d41cf6bd4ab1b1362a44397e0b95c975', --National Research Council + 'openorgs____::d2a09b9d5eabb10c95f9470e172d05d2', --??? Not exists ?? + 'openorgs____::d169c7407dd417152596908d48c11460', --Masaryk University + 'openorgs____::1ec924b1759bb16d0a02f2dad8689b21', --University of Belgrade + 'openorgs____::0ae431b820e4c33db8967fbb2b919150', --University of Helsinki + 'openorgs____::759d59f05d77188faee99b7493b46805', --University of Minho + 'openorgs____::cad284878801b9465fa51a95b1d779db', --Universidad Politécnica de Madrid + 'openorgs____::eadc8da90a546e98c03f896661a2e4d4', --University of Göttingen + 'openorgs____::c0286313e36479eff8676dba9b724b40', --National and Kapodistrian University of Athens + -- 'openorgs____::c80a8243a5e5c620d7931c88d93bf17a', --Université Paris Diderot + 'openorgs____::c08634f0a6b0081c3dc6e6c93a4314f3', --Bielefeld University + 'openorgs____::6fc85e4a8f7ecaf4b0c738d010e967ea', --University of Southern Denmark + 'openorgs____::3d6122f87f9a97a99d8f6e3d73313720', --Humboldt-Universität zu Berlin + 'openorgs____::16720ada63d0fa8ca41601feae7d1aa5', --TU Darmstadt + 'openorgs____::ccc0a066b56d2cfaf90c2ae369df16f5', --KU Leuven + 'openorgs____::4c6f119632adf789746f0a057ed73e90', --University of the Western Cape + 'openorgs____::ec3665affa01aeafa28b7852c4176dbd', --Rudjer Boskovic Institute + 'openorgs____::5f31346d444a7f06a28c880fb170b0f6', --Ghent University + 'openorgs____::2dbe47117fd5409f9c61620813456632', --University of Luxembourg + 'openorgs____::6445d7758d3a40c4d997953b6632a368', --National Institute of Informatics (NII) + 'openorgs____::b77c01aa15de3675da34277d48de2ec1', -- Valencia Catholic University Saint Vincent Martyr + 'openorgs____::7fe2f66cdc43983c6b24816bfe9cf6a0', -- Unviersity of Warsaw + 'openorgs____::15e7921fc50d9aa1229a82a84429419e', -- University Of Thessaly + 'openorgs____::11f7919dadc8f8a7251af54bba60c956', -- Technical University of Crete + 'openorgs____::84f0c5f5dbb6daf42748485924efde4b', -- University of Piraeus + 'openorgs____::4ac562f0376fce3539504567649cb373', -- University of Patras + 'openorgs____::3e8d1f8c3f6cd7f418b09f1f58b4873b', -- Aristotle University of Thessaloniki + 'openorgs____::3fcef6e1c469c10f2a84b281372c9814', -- World Bank + 'openorgs____::1698a2eb1885ef8adb5a4a969e745ad3', -- École des Ponts ParisTech + 'openorgs____::e15adb13c4dadd49de4d35c39b5da93a', -- Nanyang Technological University + 'openorgs____::4b34103bde246228fcd837f5f1bf4212', -- Autonomous University of Barcelona + 'openorgs____::72ec75fcfc4e0df1a76dc4c49007fceb', -- McMaster University + 'openorgs____::51c7fc556e46381734a25a6fbc3fd398', -- University of Modena and Reggio Emilia + 'openorgs____::235d7f9ad18ecd7e6dc62ea4990cb9db', -- Bilkent University + 'openorgs____::31f2fa9e05b49d4cf40a19c3fed8eb06', -- Saints Cyril and Methodius University of Skopje + 'openorgs____::db7686f30f22cbe73a4fde872ce812a6', -- University of Milan + 'openorgs____::b8b8ca674452579f3f593d9f5e557483', -- University College Cork + 'openorgs____::38d7097854736583dde879d12dacafca' -- Brown University + 'openorgs____::57784c9e047e826fefdb1ef816120d92', --Arts et Métiers ParisTech + 'openorgs____::2530baca8a15936ba2e3297f2bce2e7e', -- University of Cape Town + 'openorgs____::d11f981828c485cd23d93f7f24f24db1', -- Technological University Dublin + 'openorgs____::5e6bf8962665cdd040341171e5c631d8', -- Delft University of Technology + 'openorgs____::846cb428d3f52a445f7275561a7beb5d', -- University of Manitoba + 'openorgs____::eb391317ed0dc684aa81ac16265de041', -- Universitat Rovira i Virgili + 'openorgs____::66aa9fc2fceb271423dfabcc38752dc0', -- Lund University + 'openorgs____::3cff625a4370d51e08624cc586138b2f' -- IMT Atlantique + ))) foo; + +ANALYZE TABLE TARGET.result COMPUTE STATISTICS; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml index 68ef4595ef..2ab50fb29f 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml @@ -374,25 +374,29 @@ ${monitor_db_name} ${monitor_db_shadow_name} ${wf:appPath()}/scripts/step20-createMonitorDB.sql + ${wf:appPath()}/scripts/step20-createMonitorDB_funded.sql + ${wf:appPath()}/scripts/step20-createMonitorDB_institutions.sql + ${wf:appPath()}/scripts/step20-createMonitorDB_RIs.sql + ${wf:appPath()}/scripts/step20-createMonitorDB_RIs_tail.sql monitor.sh - - - - - - - ${jobTracker} - ${nameNode} - monitor-post.sh - ${monitor_db_name} - ${monitor_db_shadow_name} - monitor-post.sh - + + + + + + + + + + + + + ${jobTracker}