From 96bddcc921fde7fecc85046c535a05abf3efde4e Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 17 Apr 2024 15:06:50 +0200 Subject: [PATCH 1/4] revised query implementation for indi_pub_gold_oa --- .../oozie_app/scripts/step16-createIndicatorsTables.sql | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql index 70cde6481..f23050031 100755 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql @@ -237,16 +237,16 @@ create table if not exists ${stats_db_name}.indi_pub_gold_oa stored as parquet a UNION ALL select id, issn_online as issn from ${stats_db_name}.datasource d where d.id like '%doajarticles%' UNION ALL - select id, issn_printed as issn from ${stats_db_name}.datasource d join gold_oa on gold_oa.issn=d.issn_printed + select id, issn_printed as issn from ${stats_db_name}.datasource d left semi join gold_oa on gold_oa.issn=d.issn_printed UNION ALL - select id, issn_online as issn from ${stats_db_name}.datasource d join gold_oa on gold_oa.issn=d.issn_online) foo + select id, issn_online as issn from ${stats_db_name}.datasource d left semi join gold_oa on gold_oa.issn=d.issn_online) foo ) SELECT DISTINCT pd.id, coalesce(is_gold, 0) as is_gold FROM ${stats_db_name}.publication_datasources pd left outer join ( select pd.id, 1 as is_gold FROM ${stats_db_name}.publication_datasources pd - join dd on dd.id=pd.datasource + left semi join dd on dd.id=pd.datasource left outer join ${stats_db_name}.result_accessroute ra on ra.id = pd.id where ra.accessroute = 'gold') tmp on tmp.id=pd.id; /*EOS*/ drop table if exists ${stats_db_name}.indi_pub_hybrid_oa_with_cc purge; /*EOS*/ From 62a07b7add94c6057bd3e2102185dd629749cd98 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 17 Apr 2024 15:13:28 +0200 Subject: [PATCH 2/4] added missing end of statement /*EOS*/ --- .../oozie_app/scripts/step16-createIndicatorsTables.sql | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql index f23050031..469743269 100755 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql @@ -335,8 +335,8 @@ select ar.organization, rf.no_result_fair/ar.no_allresults org_fairness from allresults ar join result_fair rf on rf.organization=ar.organization; /*EOS*/ -DROP VIEW result_fair; -DROP VIEW allresults; +DROP VIEW result_fair; /*EOS*/ +DROP VIEW allresults; /*EOS*/ CREATE TEMPORARY VIEW result_fair as select year, ro.organization organization, count(distinct ro.id) no_result_fair from ${stats_db_name}.result_organization ro @@ -1006,7 +1006,7 @@ left outer join ${stats_db_name}.organization o on o.id=ro.organization left outer join ${stats_db_name}.result_projects rp on rp.id=ro.id left outer join ${stats_db_name}.project p on p.id=rp.project left outer join ${stats_db_name}.funder f on f.name=p.funder -where coalesce(o.country, f.country) IS NOT NULL; +where coalesce(o.country, f.country) IS NOT NULL; /*EOS*/ drop table if exists ${stats_db_name}.indi_result_oa_with_license purge; /*EOS*/ create table ${stats_db_name}.indi_result_oa_with_license stored as parquet as From 43e123c624df8894fbbdcec1c3defcfd8e56895c Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 17 Apr 2024 16:40:29 +0200 Subject: [PATCH 3/4] added column alias --- .../stats/oozie_app/scripts/step16-createIndicatorsTables.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql index 469743269..93f8e049d 100755 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql @@ -1000,7 +1000,7 @@ left outer join ( drop table if exists ${stats_db_name}.result_country purge; /*EOS*/ create table ${stats_db_name}.result_country stored as parquet as -select distinct ro.id, coalesce(o.country, f.country) +select distinct ro.id, coalesce(o.country, f.country) as country from ${stats_db_name}.result_organization ro left outer join ${stats_db_name}.organization o on o.id=ro.organization left outer join ${stats_db_name}.result_projects rp on rp.id=ro.id From 18fdaaf548f71fa93bd1c4b09a089017d04d79ea Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 18 Apr 2024 11:23:43 +0200 Subject: [PATCH 4/4] integrating suggestion from #9699 to improve the result_country table construction --- .../scripts/step16-createIndicatorsTables.sql | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql index 93f8e049d..ca5efccce 100755 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql @@ -1000,13 +1000,18 @@ left outer join ( drop table if exists ${stats_db_name}.result_country purge; /*EOS*/ create table ${stats_db_name}.result_country stored as parquet as -select distinct ro.id, coalesce(o.country, f.country) as country -from ${stats_db_name}.result_organization ro -left outer join ${stats_db_name}.organization o on o.id=ro.organization -left outer join ${stats_db_name}.result_projects rp on rp.id=ro.id -left outer join ${stats_db_name}.project p on p.id=rp.project -left outer join ${stats_db_name}.funder f on f.name=p.funder -where coalesce(o.country, f.country) IS NOT NULL; /*EOS*/ +select distinct * +from ( + select ro.id, o.country + from ${stats_db_name}.result_organization ro + left outer join ${stats_db_name}.organization o on o.id=ro.organization + union all + select rp.id, f.country + from ${stats_db_name}.result_projects + left outer join ${stats_db_name}.project p on p.id=rp.project + left outer join ${stats_db_name}.funder f on f.name=p.funder + ) rc +where rc.country is not null; /*EOS*/ drop table if exists ${stats_db_name}.indi_result_oa_with_license purge; /*EOS*/ create table ${stats_db_name}.indi_result_oa_with_license stored as parquet as