From d6102dd576c78b1f823262344bdb520493184d4f Mon Sep 17 00:00:00 2001 From: dimitrispie Date: Thu, 25 May 2023 14:52:34 +0300 Subject: [PATCH 1/4] Update step16-createIndicatorsTables.sql - Add org names to indi_project_collab_org - Add indi_pub_bronze_oa - Changes to indi_pub_hybrid_oa_with_cc --- .../scripts/step16-createIndicatorsTables.sql | 51 ++++++++++++++++--- 1 file changed, 45 insertions(+), 6 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql index 4fd941e5d..7e560684b 100755 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql @@ -119,12 +119,16 @@ drop table tmp purge; ANALYZE TABLE indi_result_org_country_collab COMPUTE STATISTICS; +create TEMPORARY TABLE AS +select o.id organization, o.name, ro.project as project from organization o + join organization_projects ro on o.id=ro.id; + create table if not exists indi_project_collab_org stored as parquet as -select o1.id org1,o2.id org2, count(distinct o1.project) as collaborations -from organization_projects as o1 - join organization_projects as o2 on o1.project=o2.project -where o1.id!=o2.id -group by o1.id, o2.id; +select o1.organization org1,o1.name orgname1, o2.organization org2, o2.name orgname2, count(distinct o1.project) as collaborations +from tmp as o1 + join tmp as o2 on o1.project=o2.project +where o1.organization<>o2.organization and o1.name<>o2.name +group by o1.name,o2.name, o1.organization, o2.organization; ANALYZE TABLE indi_project_collab_org COMPUTE STATISTICS; @@ -245,10 +249,45 @@ FROM publication_datasources pd JOIN issn on issn.id=pd.datasource JOIN hybrid_oa ON issn.issn = hybrid_oa.issn JOIN indi_result_has_cc_licence cc on pd.id=cc.id - where cc.has_cc_license=1) tmp on pd.id=tmp.id; + JOIN indi_pub_gold_oa ga on pd.id=ga.id + where cc.has_cc_license=1 and ga.is_gold=0) tmp on pd.id=tmp.id; ANALYZE TABLE indi_pub_hybrid_oa_with_cc COMPUTE STATISTICS; + create table if not exists indi_pub_bronze_oa stored as parquet as + WITH hybrid_oa AS ( + SELECT issn_l, journal_is_in_doaj, journal_is_oa, issn_print as issn + FROM STATS_EXT.plan_s_jn + WHERE issn_print != "" + UNION ALL + SELECT issn_l, journal_is_in_doaj, journal_is_oa, issn_online as issn + FROM STATS_EXT.plan_s_jn + WHERE issn_online != "" and (journal_is_in_doaj = FALSE OR journal_is_oa = FALSE)), + issn AS ( + SELECT * + FROM ( + SELECT id, issn_printed as issn + FROM datasource + WHERE issn_printed IS NOT NULL + UNION ALL + SELECT id,issn_online as issn + FROM datasource + WHERE issn_online IS NOT NULL ) as issn + WHERE LENGTH(issn) > 7) +SELECT DISTINCT pd.id, coalesce(is_bronze_oa, 0) as is_hybrid_oa +FROM publication_datasources pd + LEFT OUTER JOIN ( + SELECT pd.id, 1 as is_bronze_oa from publication_datasources pd + JOIN datasource d on d.id=pd.datasource + JOIN issn on issn.id=pd.datasource + JOIN hybrid_oa ON issn.issn = hybrid_oa.issn + JOIN indi_result_has_cc_licence cc on pd.id=cc.id + JOIN indi_pub_gold_oa ga on pd.id=ga.id + JOIN indi_pub_hybrid_oa_with_cc hy on hy.id=pd.id + where cc.has_cc_license=0 and ga.is_gold=0 and hy.is_hybrid_oa=0) tmp on pd.id=tmp.id; + +ANALYZE TABLE indi_pub_bronze_oa COMPUTE STATISTICS; + create table if not exists indi_pub_downloads stored as parquet as SELECT result_id, sum(downloads) no_downloads from openaire_prod_usage_stats.usage_stats join publication on result_id=id From ebe586b1d14a8f49c01354fe09b496e7cb206c44 Mon Sep 17 00:00:00 2001 From: dimitrispie Date: Fri, 26 May 2023 10:25:28 +0300 Subject: [PATCH 2/4] Impact indicators/Unpaywall - Added Impact indicators - Added unpaywall open access colours --- .../oa/graph/stats/oozie_app/scripts/step14.sql | 6 +++++- .../scripts/step16-createIndicatorsTables.sql | 8 ++++++++ .../oozie_app/scripts/step20-createMonitorDB.sql | 16 +++++++++++++--- 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step14.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step14.sql index 47a6f84c2..dc9e6c1f9 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step14.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step14.sql @@ -46,4 +46,8 @@ FROM ( LEFT OUTER JOIN ( SELECT substr(d.id, 4) id from ${openaire_db_name}.datasource d - WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on o.datasource = d.id; \ No newline at end of file + WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on o.datasource = d.id; + +CREATE TABLE IF NOT EXISTS ${stats_db_name}.result_accessroute STORED AS PARQUET as +select distinct substr(id,4),id, accessroute from ${openaire_db_name}.result +lateral view explode (instance.accessright.openaccessroute) openaccessroute as accessroute; diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql index 7e560684b..ef573916f 100755 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql @@ -772,3 +772,11 @@ from result p on p.id= tmp.id; ANALYZE TABLE indi_result_with_pid COMPUTE STATISTICS; + +create table if not exists indi_impact_measures as +select distinct substr(id, 4), measures_ids.id impactmetric, measures_ids.unit.value[0] score, +cast(measures_ids.unit.value[0] as decimal(6,3)) score_dec, measures_ids.unit.value[1] class +from result lateral view explode(measures) measures as measures_ids +where measures_ids.id!='views' and measures_ids.id!='downloads'; + +ANALYZE TABLE indi_impact_measures COMPUTE STATISTICS; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql index bc72b6c15..86b5c7ca1 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql @@ -64,9 +64,12 @@ create table TARGET.result stored as parquet as 'openorgs____::38d7097854736583dde879d12dacafca' -- Brown University 'openorgs____::57784c9e047e826fefdb1ef816120d92', --Arts et Métiers ParisTech 'openorgs____::2530baca8a15936ba2e3297f2bce2e7e', -- University of Cape Town - 'openorgs____::d11f981828c485cd23d93f7f24f24db1', -- Technological University Dublin - 'openorgs____::5e6bf8962665cdd040341171e5c631d8', -- Delft University of Technology - 'openorgs____::846cb428d3f52a445f7275561a7beb5d' -- University of Manitoba + 'openorgs____::d11f981828c485cd23d93f7f24f24db1', -- Technological University Dublin + 'openorgs____::5e6bf8962665cdd040341171e5c631d8', -- Delft University of Technology + 'openorgs____::846cb428d3f52a445f7275561a7beb5d', -- University of Manitoba + 'openorgs____::eb391317ed0dc684aa81ac16265de041', -- Universitat Rovira i Virgili + 'openorgs____::66aa9fc2fceb271423dfabcc38752dc0', -- Lund University + 'openorgs____::3cff625a4370d51e08624cc586138b2f' -- IMT Atlantique ) )) foo; ANALYZE TABLE TARGET.result COMPUTE STATISTICS; @@ -140,6 +143,9 @@ ANALYZE TABLE TARGET.result_topics COMPUTE STATISTICS; create table TARGET.result_fos stored as parquet as select * from SOURCE.result_fos orig where exists (select 1 from TARGET.result r where r.id=orig.id); ANALYZE TABLE TARGET.result_fos COMPUTE STATISTICS; +create table TARGET.result_accessroute stored as parquet as select * from SOURCE.result_accessroute orig where exists (select 1 from TARGET.result r where r.id=orig.id); +ANALYZE TABLE TARGET.result_accessroute COMPUTE STATISTICS; + create view TARGET.foo1 as select * from SOURCE.result_result rr where rr.source in (select id from TARGET.result); create view TARGET.foo2 as select * from SOURCE.result_result rr where rr.target in (select id from TARGET.result); create table TARGET.result_result STORED AS PARQUET as select distinct * from (select * from TARGET.foo1 union all select * from TARGET.foo2) foufou; @@ -213,6 +219,8 @@ ANALYZE TABLE TARGET.indi_result_no_of_copies COMPUTE STATISTICS; ---- Sprint 6 ---- create table TARGET.indi_pub_hybrid_oa_with_cc stored as parquet as select * from SOURCE.indi_pub_hybrid_oa_with_cc orig where exists (select 1 from TARGET.result r where r.id=orig.id); ANALYZE TABLE TARGET.indi_pub_hybrid_oa_with_cc COMPUTE STATISTICS; +create table TARGET.indi_pub_bronze_oa stored as parquet as select * from SOURCE.indi_pub_bronze_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id); +ANALYZE TABLE TARGET.indi_pub_bronze_oa COMPUTE STATISTICS; create table TARGET.indi_pub_downloads stored as parquet as select * from SOURCE.indi_pub_downloads orig where exists (select 1 from TARGET.result r where r.id=orig.result_id); ANALYZE TABLE TARGET.indi_pub_downloads COMPUTE STATISTICS; create table TARGET.indi_pub_downloads_datasource stored as parquet as select * from SOURCE.indi_pub_downloads_datasource orig where exists (select 1 from TARGET.result r where r.id=orig.result_id); @@ -241,3 +249,5 @@ create table TARGET.indi_pub_in_subscribed stored as parquet as select * from SO ANALYZE TABLE TARGET.indi_pub_in_subscribed COMPUTE STATISTICS; create table TARGET.indi_result_with_pid stored as parquet as select * from SOURCE.indi_result_with_pid orig where exists (select 1 from TARGET.result r where r.id=orig.id); ANALYZE TABLE TARGET.indi_result_with_pid COMPUTE STATISTICS; +create table TARGET.indi_impact_measures stored as parquet as select * from SOURCE.indi_impact_measures orig where exists (select 1 from TARGET.result r where r.id=orig.id); +ANALYZE TABLE TARGET.indi_impact_measures COMPUTE STATISTICS; From 23246707141163f90c53e6c4f983b000dea38c11 Mon Sep 17 00:00:00 2001 From: dimitrispie Date: Fri, 2 Jun 2023 13:34:16 +0300 Subject: [PATCH 3/4] Split Monitor DBs-Interdisciplinarity indicators - Split DBs Monitor for faster rendering of visualizations - Add interdisciplinarity indicators from result_fos --- .../oozie_app/copyDataToImpalaCluster.sh | 12 +- .../stats/oozie_app/finalizeImpalaCluster.sh | 11 ++ .../dhp/oa/graph/stats/oozie_app/monitor.sh | 80 +++++++++- .../scripts/step16-createIndicatorsTables.sql | 16 +- .../scripts/step20-createMonitorDB.sql | 137 ++++++++++-------- .../scripts/step20-createMonitorDB_RIs.sql | 15 ++ .../step20-createMonitorDB_RIs_tail.sql | 15 ++ .../scripts/step20-createMonitorDB_funded.sql | 9 ++ .../step20-createMonitorDB_institutions.sql | 56 +++++++ .../dhp/oa/graph/stats/oozie_app/workflow.xml | 30 ++-- 10 files changed, 302 insertions(+), 79 deletions(-) create mode 100644 dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_RIs.sql create mode 100644 dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_RIs_tail.sql create mode 100644 dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_funded.sql create mode 100644 dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_institutions.sql diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/copyDataToImpalaCluster.sh b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/copyDataToImpalaCluster.sh index 66783c234..b937eea25 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/copyDataToImpalaCluster.sh +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/copyDataToImpalaCluster.sh @@ -68,6 +68,16 @@ copydb $USAGE_STATS_DB copydb $PROD_USAGE_STATS_DB copydb $EXT_DB copydb $STATS_DB -copydb $MONITOR_DB +#copydb $MONITOR_DB copydb $OBSERVATORY_DB +copydb $MONITOR_DB'_funded' +copydb $MONITOR_DB'_institutions' +copydb $MONITOR_DB'_RIs_tail' + +contexts="knowmad::other dh-ch::other enermaps::other gotriple::other neanias-atmospheric::other rural-digital-europe::other covid-19::other aurora::other neanias-space::other north-america-studies::other north-american-studies::other eutopia::other" +for i in ${contexts} +do + tmp=`echo "$i" | sed 's/'-'/'_'/g' | sed 's/'::'/'_'/g'` + copydb ${MONITOR_DB}'_'${tmp} +done \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/finalizeImpalaCluster.sh b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/finalizeImpalaCluster.sh index 5914b95f8..a77b5a113 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/finalizeImpalaCluster.sh +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/finalizeImpalaCluster.sh @@ -29,3 +29,14 @@ createShadowDB $STATS_DB $STATS_DB_SHADOW createShadowDB $MONITOR_DB $MONITOR_DB_SHADOW createShadowDB $OBSERVATORY_DB $OBSERVATORY_DB_SHADOW createShadowDB USAGE_STATS_DB USAGE_STATS_DB_SHADOW + +createShadowDB $MONITOR_DB'_funded' $MONITOR_DB'_funded_shadow' +createShadowDB $MONITOR_DB'_institutions' $MONITOR_DB'_institutions_shadow' +createShadowDB $MONITOR_DB'_RIs_tail' $MONITOR_DB'_RIs_tail_shadow' + +contexts="knowmad::other dh-ch::other enermaps::other gotriple::other neanias-atmospheric::other rural-digital-europe::other covid-19::other aurora::other neanias-space::other north-america-studies::other north-american-studies::other eutopia::other" +for i in ${contexts} +do + tmp=`echo "$i" | sed 's/'-'/'_'/g' | sed 's/'::'/'_'/g'` + createShadowDB ${MONITOR_DB}'_'${tmp} ${MONITOR_DB}'_'${tmp}'_shadow' +done \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/monitor.sh b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/monitor.sh index 440aac770..08f4c9232 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/monitor.sh +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/monitor.sh @@ -10,16 +10,88 @@ export SOURCE=$1 export TARGET=$2 export SHADOW=$3 export SCRIPT_PATH=$4 +export SCRIPT_PATH2=$5 +export SCRIPT_PATH3=$6 +export SCRIPT_PATH4=$7 +export SCRIPT_PATH5=$8 export HIVE_OPTS="-hiveconf mapred.job.queue.name=analytics -hiveconf hive.spark.client.connect.timeout=120000ms -hiveconf hive.spark.client.server.connect.timeout=300000ms -hiveconf spark.executor.memory=19166291558 -hiveconf spark.yarn.executor.memoryOverhead=3225 -hiveconf spark.driver.memory=11596411699 -hiveconf spark.yarn.driver.memoryOverhead=1228" export HADOOP_USER_NAME="oozie" -echo "Getting file from " $SCRIPT_PATH -hdfs dfs -copyToLocal $SCRIPT_PATH +echo "Getting file from " $4 +hdfs dfs -copyToLocal $4 + +echo "Getting file from " $5 +hdfs dfs -copyToLocal $5 + +echo "Getting file from " $6 +hdfs dfs -copyToLocal $6 + +echo "Getting file from " $7 +hdfs dfs -copyToLocal $7 + +echo "Getting file from " $8 +hdfs dfs -copyToLocal $8 echo "Creating monitor database" -#cat step20-createMonitorDB.sql | sed s/SOURCE/$1/g | sed s/TARGET/$2/g1 > foo -cat step20-createMonitorDB.sql | sed "s/TARGET/${TARGET}/g" | sed "s/SOURCE/${SOURCE}/g1" > foo +cat step20-createMonitorDB_funded.sql | sed "s/SOURCE/$1/g" | sed "s/TARGET/$2_funded/g1" > foo hive $HIVE_OPTS -f foo +cat step20-createMonitorDB.sql | sed "s/SOURCE/$1/g" | sed "s/TARGET/$2_funded/g1" > foo +hive $HIVE_OPTS -f foo +# +cat step20-createMonitorDB_institutions.sql | sed "s/SOURCE/$1/g" | sed "s/TARGET/$2_institutions/g1" > foo +hive $HIVE_OPTS -f foo +cat step20-createMonitorDB.sql | sed "s/SOURCE/$1/g" | sed "s/TARGET/$2_institutions/g1" > foo +hive $HIVE_OPTS -f foo + +contexts="knowmad::other dh-ch::other enermaps::other gotriple::other neanias-atmospheric::other rural-digital-europe::other covid-19::other aurora::other neanias-space::other north-america-studies::other north-american-studies::other eutopia::other" + +for i in ${contexts} +do + tmp=`echo "$i" | sed 's/'-'/'_'/g' | sed 's/'::'/'_'/g'` + tmp2=`echo "$i" |sed 's/:.*//' ` + cat step20-createMonitorDB_RIs.sql | sed "s/SOURCE/$1/g" | sed "s/TARGET/$2_$tmp/g1" | sed "s/CONTEXT/\'%$tmp2%\'/g" > foo + hive $HIVE_OPTS -f foo + cat step20-createMonitorDB.sql | sed "s/SOURCE/$1/g" | sed "s/TARGET/$2_$tmp/g1" > foo + hive $HIVE_OPTS -f foo +done + + +cat step20-createMonitorDB_RIs_tail.sql | sed "s/SOURCE/$1/g" | sed "s/TARGET/$2_RIs_tail/g1" | sed "s/CONTEXTS/\"'knowmad::other','dh-ch::other', 'enermaps::other', 'gotriple::other', 'neanias-atmospheric::other', 'rural-digital-europe::other', 'covid-19::other', 'aurora::other', 'neanias-space::other', 'north-america-studies::other', 'north-american-studies::other', 'eutopia::other'\"/g" > foo +hive $HIVE_OPTS -f foo +cat step20-createMonitorDB.sql | sed "s/SOURCE/$1/g" | sed "s/TARGET/$2_RIs_tail/g1" > foo +hive $HIVE_OPTS -f foo + echo "Hive shell finished" +echo "Updating shadow monitor funded database" +hive -e "drop database if exists ${SHADOW}_funded cascade" +hive -e "create database if not exists ${SHADOW}_funded" +hive $HIVE_OPTS --database ${2}_funded -e "show tables" | grep -v WARN | sed "s/\(.*\)/create view ${SHADOW}_funded.\1 as select * from ${2}_funded.\1;/" > foo +hive -f foo +echo "Updated shadow monitor funded database" + +echo "Updating shadow monitor insitutions database" +hive -e "drop database if exists ${SHADOW}_institutions cascade" +hive -e "create database if not exists ${SHADOW}_institutions" +hive $HIVE_OPTS --database ${2}_institutions -e "show tables" | grep -v WARN | sed "s/\(.*\)/create view ${SHADOW}_institutions.\1 as select * from ${2}_institutions.\1;/" > foo +hive -f foo +echo "Shadow db monitor insitutions ready!" + +echo "Updating shadow monitor RIs database" +for i in $contexts +do + tmp=`echo "$i" | sed 's/'-'/'_'/g' | sed 's/'::'/'_'/g'` + hive -e "drop database if exists ${SHADOW}_${tmp} cascade" + hive -e "create database if not exists ${SHADOW}_${tmp}" + hive $HIVE_OPTS --database ${2}_${tmp} -e "show tables" | grep -v WARN | sed "s/\(.*\)/create view ${SHADOW}_${tmp}.\1 as select * from ${2}_${tmp}.\1;/" > foo + hive -f foo +done +echo "Shadow db monitor RIs ready!" + +echo "Updating shadow monitor RIs tail database" +hive -e "drop database if exists ${SHADOW}_ris_tail cascade" +hive -e "create database if not exists ${SHADOW}_ris_tail" +hive $HIVE_OPTS --database ${2}_ris_tail -e "show tables" | grep -v WARN | sed "s/\(.*\)/create view ${SHADOW}_ris_tail.\1 as select * from ${2}_ris_tail.\1;/" > foo +hive -f foo +echo "Shadow db monitor RIs tail ready!" diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql index ef573916f..e358e0ef0 100755 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql @@ -779,4 +779,18 @@ cast(measures_ids.unit.value[0] as decimal(6,3)) score_dec, measures_ids.unit.va from result lateral view explode(measures) measures as measures_ids where measures_ids.id!='views' and measures_ids.id!='downloads'; -ANALYZE TABLE indi_impact_measures COMPUTE STATISTICS; \ No newline at end of file +ANALYZE TABLE indi_impact_measures COMPUTE STATISTICS; + +CREATE TEMPORARY TABLE pub_fos_totals as +select rf.id, count(distinct lvl3) totals from result_fos rf +group by rf.id; + +create table if not exists indi_pub_interdisciplinarity as +select distinct p.id, coalesce(indi_pub_is_interdisciplinary, 0) +as indi_pub_is_interdisciplinary +from pub_fos_totals p +left outer join ( +select pub_fos_totals.id, 1 as indi_pub_is_interdisciplinary from pub_fos_totals +where totals>10) tmp on p.id=tmp.id; + +ANALYZE TABLE indi_pub_interdisciplinarity COMPUTE STATISTICS; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql index 86b5c7ca1..9744d5aae 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql @@ -1,5 +1,78 @@ -drop database if exists TARGET cascade; -create database if not exists TARGET; +--drop database if exists TARGET cascade; +--create database if not exists TARGET; +-- +--create view if not exists TARGET.category as select * from SOURCE.category; +--create view if not exists TARGET.concept as select * from SOURCE.concept; +--create view if not exists TARGET.context as select * from SOURCE.context; +--create view if not exists TARGET.country as select * from SOURCE.country; +--create view if not exists TARGET.countrygdp as select * from SOURCE.countrygdp; +--create view if not exists TARGET.creation_date as select * from SOURCE.creation_date; +--create view if not exists TARGET.funder as select * from SOURCE.funder; +--create view if not exists TARGET.fundref as select * from SOURCE.fundref; +--create view if not exists TARGET.rndexpenditure as select * from SOURCE.rndexpediture; +--create view if not exists TARGET.rndgdpexpenditure as select * from SOURCE.rndgdpexpenditure; +--create view if not exists TARGET.doctoratestudents as select * from SOURCE.doctoratestudents; +--create view if not exists TARGET.totalresearchers as select * from SOURCE.totalresearchers; +--create view if not exists TARGET.totalresearchersft as select * from SOURCE.totalresearchersft; +--create view if not exists TARGET.hrrst as select * from SOURCE.hrrst; +-- +--create table TARGET.result stored as parquet as +-- select distinct * from ( +-- select * from SOURCE.result r where exists (select 1 from SOURCE.result_projects rp join SOURCE.project p on rp.project=p.id where rp.id=r.id) +-- union all +-- select * from SOURCE.result r where exists (select 1 from SOURCE.result_concepts rc where rc.id=r.id) +-- union all +-- select * from SOURCE.result r where exists (select 1 from SOURCE.result_organization ro where ro.id=r.id and ro.organization in ( +-- 'openorgs____::b84450f9864182c67b8611b5593f4250', --"Athena Research and Innovation Center In Information Communication & Knowledge Technologies', --ARC" +-- 'openorgs____::d41cf6bd4ab1b1362a44397e0b95c975', --National Research Council +-- 'openorgs____::d2a09b9d5eabb10c95f9470e172d05d2', --??? Not exists ?? +-- 'openorgs____::d169c7407dd417152596908d48c11460', --Masaryk University +-- 'openorgs____::1ec924b1759bb16d0a02f2dad8689b21', --University of Belgrade +-- 'openorgs____::0ae431b820e4c33db8967fbb2b919150', --University of Helsinki +-- 'openorgs____::759d59f05d77188faee99b7493b46805', --University of Minho +-- 'openorgs____::cad284878801b9465fa51a95b1d779db', --Universidad Politécnica de Madrid +-- 'openorgs____::eadc8da90a546e98c03f896661a2e4d4', --University of Göttingen +-- 'openorgs____::c0286313e36479eff8676dba9b724b40', --National and Kapodistrian University of Athens +-- -- 'openorgs____::c80a8243a5e5c620d7931c88d93bf17a', --Université Paris Diderot +-- 'openorgs____::c08634f0a6b0081c3dc6e6c93a4314f3', --Bielefeld University +-- 'openorgs____::6fc85e4a8f7ecaf4b0c738d010e967ea', --University of Southern Denmark +-- 'openorgs____::3d6122f87f9a97a99d8f6e3d73313720', --Humboldt-Universität zu Berlin +-- 'openorgs____::16720ada63d0fa8ca41601feae7d1aa5', --TU Darmstadt +-- 'openorgs____::ccc0a066b56d2cfaf90c2ae369df16f5', --KU Leuven +-- 'openorgs____::4c6f119632adf789746f0a057ed73e90', --University of the Western Cape +-- 'openorgs____::ec3665affa01aeafa28b7852c4176dbd', --Rudjer Boskovic Institute +-- 'openorgs____::5f31346d444a7f06a28c880fb170b0f6', --Ghent University +-- 'openorgs____::2dbe47117fd5409f9c61620813456632', --University of Luxembourg +-- 'openorgs____::6445d7758d3a40c4d997953b6632a368', --National Institute of Informatics (NII) +-- 'openorgs____::b77c01aa15de3675da34277d48de2ec1', -- Valencia Catholic University Saint Vincent Martyr +-- 'openorgs____::7fe2f66cdc43983c6b24816bfe9cf6a0', -- Unviersity of Warsaw +-- 'openorgs____::15e7921fc50d9aa1229a82a84429419e', -- University Of Thessaly +-- 'openorgs____::11f7919dadc8f8a7251af54bba60c956', -- Technical University of Crete +-- 'openorgs____::84f0c5f5dbb6daf42748485924efde4b', -- University of Piraeus +-- 'openorgs____::4ac562f0376fce3539504567649cb373', -- University of Patras +-- 'openorgs____::3e8d1f8c3f6cd7f418b09f1f58b4873b', -- Aristotle University of Thessaloniki +-- 'openorgs____::3fcef6e1c469c10f2a84b281372c9814', -- World Bank +-- 'openorgs____::1698a2eb1885ef8adb5a4a969e745ad3', -- École des Ponts ParisTech +-- 'openorgs____::e15adb13c4dadd49de4d35c39b5da93a', -- Nanyang Technological University +-- 'openorgs____::4b34103bde246228fcd837f5f1bf4212', -- Autonomous University of Barcelona +-- 'openorgs____::72ec75fcfc4e0df1a76dc4c49007fceb', -- McMaster University +-- 'openorgs____::51c7fc556e46381734a25a6fbc3fd398', -- University of Modena and Reggio Emilia +-- 'openorgs____::235d7f9ad18ecd7e6dc62ea4990cb9db', -- Bilkent University +-- 'openorgs____::31f2fa9e05b49d4cf40a19c3fed8eb06', -- Saints Cyril and Methodius University of Skopje +-- 'openorgs____::db7686f30f22cbe73a4fde872ce812a6', -- University of Milan +-- 'openorgs____::b8b8ca674452579f3f593d9f5e557483', -- University College Cork +-- 'openorgs____::38d7097854736583dde879d12dacafca' -- Brown University +-- 'openorgs____::57784c9e047e826fefdb1ef816120d92', --Arts et Métiers ParisTech +-- 'openorgs____::2530baca8a15936ba2e3297f2bce2e7e', -- University of Cape Town +-- 'openorgs____::d11f981828c485cd23d93f7f24f24db1', -- Technological University Dublin +-- 'openorgs____::5e6bf8962665cdd040341171e5c631d8', -- Delft University of Technology +-- 'openorgs____::846cb428d3f52a445f7275561a7beb5d', -- University of Manitoba +-- 'openorgs____::eb391317ed0dc684aa81ac16265de041', -- Universitat Rovira i Virgili +-- 'openorgs____::66aa9fc2fceb271423dfabcc38752dc0', -- Lund University +-- 'openorgs____::3cff625a4370d51e08624cc586138b2f' -- IMT Atlantique +-- ) )) foo; +-- +--ANALYZE TABLE TARGET.result COMPUTE STATISTICS; create view if not exists TARGET.category as select * from SOURCE.category; create view if not exists TARGET.concept as select * from SOURCE.concept; @@ -16,64 +89,6 @@ create view if not exists TARGET.totalresearchers as select * from SOURCE.totalr create view if not exists TARGET.totalresearchersft as select * from SOURCE.totalresearchersft; create view if not exists TARGET.hrrst as select * from SOURCE.hrrst; -create table TARGET.result stored as parquet as - select distinct * from ( - select * from SOURCE.result r where exists (select 1 from SOURCE.result_projects rp join SOURCE.project p on rp.project=p.id where rp.id=r.id) - union all - select * from SOURCE.result r where exists (select 1 from SOURCE.result_concepts rc where rc.id=r.id) - union all - select * from SOURCE.result r where exists (select 1 from SOURCE.result_organization ro where ro.id=r.id and ro.organization in ( - 'openorgs____::b84450f9864182c67b8611b5593f4250', --"Athena Research and Innovation Center In Information Communication & Knowledge Technologies', --ARC" - 'openorgs____::d41cf6bd4ab1b1362a44397e0b95c975', --National Research Council - 'openorgs____::d2a09b9d5eabb10c95f9470e172d05d2', --??? Not exists ?? - 'openorgs____::d169c7407dd417152596908d48c11460', --Masaryk University - 'openorgs____::1ec924b1759bb16d0a02f2dad8689b21', --University of Belgrade - 'openorgs____::0ae431b820e4c33db8967fbb2b919150', --University of Helsinki - 'openorgs____::759d59f05d77188faee99b7493b46805', --University of Minho - 'openorgs____::cad284878801b9465fa51a95b1d779db', --Universidad Politécnica de Madrid - 'openorgs____::eadc8da90a546e98c03f896661a2e4d4', --University of Göttingen - 'openorgs____::c0286313e36479eff8676dba9b724b40', --National and Kapodistrian University of Athens - -- 'openorgs____::c80a8243a5e5c620d7931c88d93bf17a', --Université Paris Diderot - 'openorgs____::c08634f0a6b0081c3dc6e6c93a4314f3', --Bielefeld University - 'openorgs____::6fc85e4a8f7ecaf4b0c738d010e967ea', --University of Southern Denmark - 'openorgs____::3d6122f87f9a97a99d8f6e3d73313720', --Humboldt-Universität zu Berlin - 'openorgs____::16720ada63d0fa8ca41601feae7d1aa5', --TU Darmstadt - 'openorgs____::ccc0a066b56d2cfaf90c2ae369df16f5', --KU Leuven - 'openorgs____::4c6f119632adf789746f0a057ed73e90', --University of the Western Cape - 'openorgs____::ec3665affa01aeafa28b7852c4176dbd', --Rudjer Boskovic Institute - 'openorgs____::5f31346d444a7f06a28c880fb170b0f6', --Ghent University - 'openorgs____::2dbe47117fd5409f9c61620813456632', --University of Luxembourg - 'openorgs____::6445d7758d3a40c4d997953b6632a368', --National Institute of Informatics (NII) - 'openorgs____::b77c01aa15de3675da34277d48de2ec1', -- Valencia Catholic University Saint Vincent Martyr - 'openorgs____::7fe2f66cdc43983c6b24816bfe9cf6a0', -- Unviersity of Warsaw - 'openorgs____::15e7921fc50d9aa1229a82a84429419e', -- University Of Thessaly - 'openorgs____::11f7919dadc8f8a7251af54bba60c956', -- Technical University of Crete - 'openorgs____::84f0c5f5dbb6daf42748485924efde4b', -- University of Piraeus - 'openorgs____::4ac562f0376fce3539504567649cb373', -- University of Patras - 'openorgs____::3e8d1f8c3f6cd7f418b09f1f58b4873b', -- Aristotle University of Thessaloniki - 'openorgs____::3fcef6e1c469c10f2a84b281372c9814', -- World Bank - 'openorgs____::1698a2eb1885ef8adb5a4a969e745ad3', -- École des Ponts ParisTech - 'openorgs____::e15adb13c4dadd49de4d35c39b5da93a', -- Nanyang Technological University - 'openorgs____::4b34103bde246228fcd837f5f1bf4212', -- Autonomous University of Barcelona - 'openorgs____::72ec75fcfc4e0df1a76dc4c49007fceb', -- McMaster University - 'openorgs____::51c7fc556e46381734a25a6fbc3fd398', -- University of Modena and Reggio Emilia - 'openorgs____::235d7f9ad18ecd7e6dc62ea4990cb9db', -- Bilkent University - 'openorgs____::31f2fa9e05b49d4cf40a19c3fed8eb06', -- Saints Cyril and Methodius University of Skopje - 'openorgs____::db7686f30f22cbe73a4fde872ce812a6', -- University of Milan - 'openorgs____::b8b8ca674452579f3f593d9f5e557483', -- University College Cork - 'openorgs____::38d7097854736583dde879d12dacafca' -- Brown University - 'openorgs____::57784c9e047e826fefdb1ef816120d92', --Arts et Métiers ParisTech - 'openorgs____::2530baca8a15936ba2e3297f2bce2e7e', -- University of Cape Town - 'openorgs____::d11f981828c485cd23d93f7f24f24db1', -- Technological University Dublin - 'openorgs____::5e6bf8962665cdd040341171e5c631d8', -- Delft University of Technology - 'openorgs____::846cb428d3f52a445f7275561a7beb5d', -- University of Manitoba - 'openorgs____::eb391317ed0dc684aa81ac16265de041', -- Universitat Rovira i Virgili - 'openorgs____::66aa9fc2fceb271423dfabcc38752dc0', -- Lund University - 'openorgs____::3cff625a4370d51e08624cc586138b2f' -- IMT Atlantique - ) )) foo; - -ANALYZE TABLE TARGET.result COMPUTE STATISTICS; - create table TARGET.result_citations stored as parquet as select * from SOURCE.result_citations orig where exists (select 1 from TARGET.result r where r.id=orig.id); ANALYZE TABLE TARGET.result_citations COMPUTE STATISTICS; @@ -251,3 +266,5 @@ create table TARGET.indi_result_with_pid stored as parquet as select * from SOUR ANALYZE TABLE TARGET.indi_result_with_pid COMPUTE STATISTICS; create table TARGET.indi_impact_measures stored as parquet as select * from SOURCE.indi_impact_measures orig where exists (select 1 from TARGET.result r where r.id=orig.id); ANALYZE TABLE TARGET.indi_impact_measures COMPUTE STATISTICS; +create table TARGET.indi_pub_interdisciplinarity stored as parquet as select * from SOURCE.indi_pub_interdisciplinarity orig where exists (select 1 from TARGET.result r where r.id=orig.id); +ANALYZE TABLE TARGET.indi_pub_interdisciplinarity COMPUTE STATISTICS; diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_RIs.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_RIs.sql new file mode 100644 index 000000000..92b40405d --- /dev/null +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_RIs.sql @@ -0,0 +1,15 @@ +drop database if exists TARGET cascade; +create database if not exists TARGET; + +create table TARGET.result stored as parquet as + select distinct * from ( + select * from SOURCE.result r where exists + (select 1 + from SOURCE.result_concepts rc + join SOURCE.concept conc on conc.id=rc.concept + join SOURCE.category cat on cat.id=conc.category + join SOURCE.context cont on cont.id=cat.context +-- join SOURCE.result + where rc.id=r.id and conc.category like CONTEXT) +) foo; +ANALYZE TABLE TARGET.result COMPUTE STATISTICS; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_RIs_tail.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_RIs_tail.sql new file mode 100644 index 000000000..ef6d08d79 --- /dev/null +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_RIs_tail.sql @@ -0,0 +1,15 @@ +drop database if exists TARGET cascade; +create database if not exists TARGET; + +create table TARGET.result stored as parquet as + select distinct * from ( + select * from SOURCE.result r where exists + (select 1 + from SOURCE.result_concepts rc + join SOURCE.concept conc on conc.id=rc.concept + join SOURCE.category cat on cat.id=conc.category + join SOURCE.context cont on cont.id=cat.context +-- join SOURCE.result + where rc.id=r.id and conc.category not in (CONTEXTS)) +) foo; +ANALYZE TABLE TARGET.result COMPUTE STATISTICS; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_funded.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_funded.sql new file mode 100644 index 000000000..8d8739c74 --- /dev/null +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_funded.sql @@ -0,0 +1,9 @@ +drop database if exists TARGET cascade; +create database if not exists TARGET; + +create table TARGET.result stored as parquet as + select distinct * from ( + select * from SOURCE.result r where exists (select 1 from SOURCE.result_projects rp join SOURCE.project p on rp.project=p.id where rp.id=r.id) + ) foo; + +ANALYZE TABLE TARGET.result COMPUTE STATISTICS; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_institutions.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_institutions.sql new file mode 100644 index 000000000..121ee6e7f --- /dev/null +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_institutions.sql @@ -0,0 +1,56 @@ +drop database if exists TARGET cascade; +create database if not exists TARGET; + +create table TARGET.result stored as parquet as + select distinct * from ( + select * from SOURCE.result r where exists (select 1 from SOURCE.result_organization ro where ro.id=r.id and ro.organization in ( + 'openorgs____::b84450f9864182c67b8611b5593f4250', --"Athena Research and Innovation Center In Information Communication & Knowledge Technologies', --ARC" + 'openorgs____::d41cf6bd4ab1b1362a44397e0b95c975', --National Research Council + 'openorgs____::d2a09b9d5eabb10c95f9470e172d05d2', --??? Not exists ?? + 'openorgs____::d169c7407dd417152596908d48c11460', --Masaryk University + 'openorgs____::1ec924b1759bb16d0a02f2dad8689b21', --University of Belgrade + 'openorgs____::0ae431b820e4c33db8967fbb2b919150', --University of Helsinki + 'openorgs____::759d59f05d77188faee99b7493b46805', --University of Minho + 'openorgs____::cad284878801b9465fa51a95b1d779db', --Universidad Politécnica de Madrid + 'openorgs____::eadc8da90a546e98c03f896661a2e4d4', --University of Göttingen + 'openorgs____::c0286313e36479eff8676dba9b724b40', --National and Kapodistrian University of Athens + -- 'openorgs____::c80a8243a5e5c620d7931c88d93bf17a', --Université Paris Diderot + 'openorgs____::c08634f0a6b0081c3dc6e6c93a4314f3', --Bielefeld University + 'openorgs____::6fc85e4a8f7ecaf4b0c738d010e967ea', --University of Southern Denmark + 'openorgs____::3d6122f87f9a97a99d8f6e3d73313720', --Humboldt-Universität zu Berlin + 'openorgs____::16720ada63d0fa8ca41601feae7d1aa5', --TU Darmstadt + 'openorgs____::ccc0a066b56d2cfaf90c2ae369df16f5', --KU Leuven + 'openorgs____::4c6f119632adf789746f0a057ed73e90', --University of the Western Cape + 'openorgs____::ec3665affa01aeafa28b7852c4176dbd', --Rudjer Boskovic Institute + 'openorgs____::5f31346d444a7f06a28c880fb170b0f6', --Ghent University + 'openorgs____::2dbe47117fd5409f9c61620813456632', --University of Luxembourg + 'openorgs____::6445d7758d3a40c4d997953b6632a368', --National Institute of Informatics (NII) + 'openorgs____::b77c01aa15de3675da34277d48de2ec1', -- Valencia Catholic University Saint Vincent Martyr + 'openorgs____::7fe2f66cdc43983c6b24816bfe9cf6a0', -- Unviersity of Warsaw + 'openorgs____::15e7921fc50d9aa1229a82a84429419e', -- University Of Thessaly + 'openorgs____::11f7919dadc8f8a7251af54bba60c956', -- Technical University of Crete + 'openorgs____::84f0c5f5dbb6daf42748485924efde4b', -- University of Piraeus + 'openorgs____::4ac562f0376fce3539504567649cb373', -- University of Patras + 'openorgs____::3e8d1f8c3f6cd7f418b09f1f58b4873b', -- Aristotle University of Thessaloniki + 'openorgs____::3fcef6e1c469c10f2a84b281372c9814', -- World Bank + 'openorgs____::1698a2eb1885ef8adb5a4a969e745ad3', -- École des Ponts ParisTech + 'openorgs____::e15adb13c4dadd49de4d35c39b5da93a', -- Nanyang Technological University + 'openorgs____::4b34103bde246228fcd837f5f1bf4212', -- Autonomous University of Barcelona + 'openorgs____::72ec75fcfc4e0df1a76dc4c49007fceb', -- McMaster University + 'openorgs____::51c7fc556e46381734a25a6fbc3fd398', -- University of Modena and Reggio Emilia + 'openorgs____::235d7f9ad18ecd7e6dc62ea4990cb9db', -- Bilkent University + 'openorgs____::31f2fa9e05b49d4cf40a19c3fed8eb06', -- Saints Cyril and Methodius University of Skopje + 'openorgs____::db7686f30f22cbe73a4fde872ce812a6', -- University of Milan + 'openorgs____::b8b8ca674452579f3f593d9f5e557483', -- University College Cork + 'openorgs____::38d7097854736583dde879d12dacafca' -- Brown University + 'openorgs____::57784c9e047e826fefdb1ef816120d92', --Arts et Métiers ParisTech + 'openorgs____::2530baca8a15936ba2e3297f2bce2e7e', -- University of Cape Town + 'openorgs____::d11f981828c485cd23d93f7f24f24db1', -- Technological University Dublin + 'openorgs____::5e6bf8962665cdd040341171e5c631d8', -- Delft University of Technology + 'openorgs____::846cb428d3f52a445f7275561a7beb5d', -- University of Manitoba + 'openorgs____::eb391317ed0dc684aa81ac16265de041', -- Universitat Rovira i Virgili + 'openorgs____::66aa9fc2fceb271423dfabcc38752dc0', -- Lund University + 'openorgs____::3cff625a4370d51e08624cc586138b2f' -- IMT Atlantique + ))) foo; + +ANALYZE TABLE TARGET.result COMPUTE STATISTICS; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml index 68ef4595e..2ab50fb29 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml @@ -374,25 +374,29 @@ ${monitor_db_name} ${monitor_db_shadow_name} ${wf:appPath()}/scripts/step20-createMonitorDB.sql + ${wf:appPath()}/scripts/step20-createMonitorDB_funded.sql + ${wf:appPath()}/scripts/step20-createMonitorDB_institutions.sql + ${wf:appPath()}/scripts/step20-createMonitorDB_RIs.sql + ${wf:appPath()}/scripts/step20-createMonitorDB_RIs_tail.sql monitor.sh - - - - - - - ${jobTracker} - ${nameNode} - monitor-post.sh - ${monitor_db_name} - ${monitor_db_shadow_name} - monitor-post.sh - + + + + + + + + + + + + + ${jobTracker} From ad07fbf0538aedd4dbb54673f45cac56586a36d8 Mon Sep 17 00:00:00 2001 From: dimitrispie Date: Fri, 2 Jun 2023 14:13:10 +0300 Subject: [PATCH 4/4] Add names to organizations for collaboration indicators --- .../scripts/step16-createIndicatorsTables.sql | 26 ++++++++++--------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql index e358e0ef0..f523b63d5 100755 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql @@ -92,28 +92,28 @@ ANALYZE TABLE indi_funded_result_with_fundref COMPUTE STATISTICS; -- -- compute stats indi_result_org_collab; -- -create TEMPORARY TABLE tmp AS SELECT ro.organization organization, ro.id from result_organization ro +create TEMPORARY TABLE tmp AS SELECT ro.organization organization, ro.id, o.name from result_organization ro join organization o on o.id=ro.organization where o.name is not null; create table if not exists indi_result_org_collab stored as parquet as -select o1.organization org1, o2.organization org2, count(o1.id) as collaborations +select o1.organization org1, o1.name org1name1, o2.organization org2, o2.name org2name2, count(o1.id) as collaborations from tmp as o1 -join tmp as o2 where o1.id=o2.id and o1.organization!=o2.organization -group by o1.organization, o2.organization; +join tmp as o2 where o1.id=o2.id and o1.organization!=o2.organization and o1.name!=o2.name +group by o1.organization, o2.organization, o1.name, o2.name; drop table tmp purge; ANALYZE TABLE indi_result_org_collab COMPUTE STATISTICS; create TEMPORARY TABLE tmp AS -select distinct ro.organization organization, ro.id, o.country from result_organization ro +select distinct ro.organization organization, ro.id, o.name, o.country from result_organization ro join organization o on o.id=ro.organization where country <> 'UNKNOWN' and o.name is not null; create table if not exists indi_result_org_country_collab stored as parquet as -select o1.organization org1,o2.country country2, count(o1.id) as collaborations +select o1.organization org1,o1.name org1name1, o2.country country2, count(o1.id) as collaborations from tmp as o1 join tmp as o2 on o1.id=o2.id where o1.id=o2.id and o1.country!=o2.country -group by o1.organization, o1.id, o2.country; +group by o1.organization, o1.id, o1.name, o2.country; drop table tmp purge; @@ -121,7 +121,7 @@ ANALYZE TABLE indi_result_org_country_collab COMPUTE STATISTICS; create TEMPORARY TABLE AS select o.id organization, o.name, ro.project as project from organization o - join organization_projects ro on o.id=ro.id; + join organization_projects ro on o.id=ro.id where o.name is not null; create table if not exists indi_project_collab_org stored as parquet as select o1.organization org1,o1.name orgname1, o2.organization org2, o2.name orgname2, count(distinct o1.project) as collaborations @@ -133,16 +133,16 @@ group by o1.name,o2.name, o1.organization, o2.organization; ANALYZE TABLE indi_project_collab_org COMPUTE STATISTICS; create TEMPORARY TABLE tmp AS -select o.id organization, o.country , ro.project as project from organization o +select o.id organization, o.name, o.country , ro.project as project from organization o join organization_projects ro on o.id=ro.id - and o.country <> 'UNKNOWN'; + and o.country <> 'UNKNOWN' and o.name is not null; create table if not exists indi_project_collab_org_country stored as parquet as -select o1.organization org1,o2.country country2, count(distinct o1.project) as collaborations +select o1.organization org1,o1.name org1name, o2.country country2, count(distinct o1.project) as collaborations from tmp as o1 join tmp as o2 on o1.project=o2.project where o1.organization<>o2.organization and o1.country<>o2.country -group by o1.organization, o2.country; +group by o1.organization, o2.country, o1.name; drop table tmp purge; @@ -793,4 +793,6 @@ left outer join ( select pub_fos_totals.id, 1 as indi_pub_is_interdisciplinary from pub_fos_totals where totals>10) tmp on p.id=tmp.id; +drop table pub_fos_totals purge; + ANALYZE TABLE indi_pub_interdisciplinarity COMPUTE STATISTICS; \ No newline at end of file