From 2b5f8c9c9a3611c57ee5febfe262a455a39ad801 Mon Sep 17 00:00:00 2001 From: dimitrispie Date: Tue, 6 Sep 2022 12:27:53 +0300 Subject: [PATCH 1/3] comment out duplicate table creation --- .../scripts/step16-createIndicatorsTables.sql | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql index 2581f1d64..b13eae0aa 100755 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql @@ -179,17 +179,17 @@ from publication_datasources pd compute stats indi_pub_diamond; -create table indi_pub_hybrid stored as parquet as -select distinct pd.id, coalesce(is_hybrid, 0) as is_hybrid -from publication_datasources pd - left outer join ( - select pd.id, 1 as is_hybrid from publication_datasources pd - join datasource d on d.id=pd.datasource - join stats_ext.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_online=d.issn_online) - and (ps.journal_is_in_doaj=false and ps.journal_is_oa=false)) tmp - on pd.id=tmp.id; - -compute stats indi_pub_hybrid; +--create table indi_pub_hybrid stored as parquet as +--select distinct pd.id, coalesce(is_hybrid, 0) as is_hybrid +--from publication_datasources pd +-- left outer join ( +-- select pd.id, 1 as is_hybrid from publication_datasources pd +-- join datasource d on d.id=pd.datasource +-- join stats_ext.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_online=d.issn_online) +-- and (ps.journal_is_in_doaj=false and ps.journal_is_oa=false)) tmp +-- on pd.id=tmp.id; +-- +--compute stats indi_pub_hybrid; create table indi_pub_in_transformative stored as parquet as select distinct pd.id, coalesce(is_transformative, 0) as is_transformative From 71b069ca90a2f7ec09d64241c60917d3636fc81e Mon Sep 17 00:00:00 2001 From: dimitrispie Date: Fri, 9 Sep 2022 13:15:58 +0300 Subject: [PATCH 2/3] Changes to indicator and monitor scripts --- .../scripts/step16-createIndicatorsTables.sql | 16 ++++++++-------- .../oozie_app/scripts/step20-createMonitorDB.sql | 5 ++++- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql index b13eae0aa..0f294145c 100755 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql @@ -564,12 +564,12 @@ create table indi_org_fairness stored as parquet as (select ro.organization organization, count(distinct ro.id) no_result_fair from result_organization ro join result r on r.id=ro.id --join result_pids rp on r.id=rp.id - where (title is not null) and (publisher is not null) and (abstract is true) and (year is not null) and (authors>0) and year>2003 + where (title is not null) and (publisher is not null) and (abstract is true) and (year is not null) and (authors>0) and cast(year as int)>2003 group by ro.organization), --return all results group by organization allresults as (select organization, count(distinct ro.id) no_allresults from result_organization ro join result r on r.id=ro.id - where year>2003 + where cast(year as int)>2003 group by organization) --return results_fair/all_results select allresults.organization, result_fair.no_result_fair/allresults.no_allresults org_fairness @@ -638,11 +638,11 @@ create table indi_org_fairness_year stored as parquet as (select year, ro.organization organization, count(distinct ro.id) no_result_fair from result_organization ro join result r on r.id=ro.id join result_pids rp on r.id=rp.id - where (title is not null) and (publisher is not null) and (abstract is true) and (year is not null) and (authors>0) and year>2003 + where (title is not null) and (publisher is not null) and (abstract is true) and (year is not null) and (authors>0) and cast(year as int)>2003 group by ro.organization, year), allresults as (select year, organization, count(distinct ro.id) no_allresults from result_organization ro join result r on r.id=ro.id - where year>2003 + where cast(year as int)>2003 group by organization, year) --return results_fair/all_results select allresults.year, allresults.organization, result_fair.no_result_fair/allresults.no_allresults org_fairness @@ -657,12 +657,12 @@ create table indi_org_findable_year stored as parquet as (select year, ro.organization organization, count(distinct rp.id) no_result_with_pid from result_organization ro join result_pids rp on rp.id=ro.id join result r on r.id=rp.id - where year >2003 + where cast(year as int) >2003 group by ro.organization, year), --return all results group by organization,year allresults as (select year, organization, count(distinct ro.id) no_allresults from result_organization ro join result r on r.id=ro.id - where year >2003 + where cast(year as int) >2003 group by organization, year) --return results_with_pid/all_results select allresults.year, allresults.organization, result_with_pid.no_result_with_pid/allresults.no_allresults org_findable @@ -677,12 +677,12 @@ create table indi_org_findable stored as parquet as (select ro.organization organization, count(distinct rp.id) no_result_with_pid from result_organization ro join result_pids rp on rp.id=ro.id join result r on r.id=rp.id - where year >2003 + where cast(year as int) >2003 group by ro.organization), --return all results group by organization allresults as (select organization, count(distinct ro.id) no_allresults from result_organization ro join result r on r.id=ro.id - where year >2003 + where cast(year as int) >2003 group by organization) --return results_with_pid/all_results select allresults.organization, result_with_pid.no_result_with_pid/allresults.no_allresults org_findable diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql index d78895198..572b70ef7 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql @@ -45,7 +45,10 @@ create table TARGET.result stored as parquet as 'openorgs____::15e7921fc50d9aa1229a82a84429419e', -- University Of Thessaly 'openorgs____::11f7919dadc8f8a7251af54bba60c956', -- Technical University of Crete 'openorgs____::84f0c5f5dbb6daf42748485924efde4b', -- University of Piraeus - 'openorgs____::4ac562f0376fce3539504567649cb373' -- University of Patras + 'openorgs____::4ac562f0376fce3539504567649cb373', -- University of Patras + 'openorgs____::3e8d1f8c3f6cd7f418b09f1f58b4873b', -- Aristotle University of Thessaloniki + 'openorgs____::3fcef6e1c469c10f2a84b281372c9814', -- World Bank + 'openorgs____::1698a2eb1885ef8adb5a4a969e745ad3' -- École des Ponts ParisTech ) )) foo; compute stats TARGET.result; From 3bf3127251477ea0a1ea5a3d422c55403aaf28b5 Mon Sep 17 00:00:00 2001 From: dimitrispie Date: Wed, 14 Sep 2022 16:36:19 +0300 Subject: [PATCH 3/3] Changes to monitor and indicator scripts --- .../oozie_app/scripts/step16-createIndicatorsTables.sql | 6 +++--- .../stats/oozie_app/scripts/step16_1-definitions.sql | 6 +++--- .../stats/oozie_app/scripts/step20-createMonitorDB.sql | 8 ++++---- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql index 0f294145c..b8792ae61 100755 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql @@ -76,11 +76,11 @@ compute stats indi_result_with_orcid; ---- Sprint 3 ---- create table indi_funded_result_with_fundref stored as parquet as -select distinct r.id, coalesce(fundref, 0) as fundref +select distinct r.result as id, coalesce(fundref, 0) as fundref from project_results r - left outer join (select distinct id, 1 as fundref from project_results + left outer join (select distinct result, 1 as fundref from project_results where provenance='Harvested') tmp - on r.id= tmp.id; + on r.result= tmp.result; compute stats indi_funded_result_with_fundref; diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_1-definitions.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_1-definitions.sql index 88c1ece78..41c95758c 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_1-definitions.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_1-definitions.sql @@ -3,20 +3,20 @@ ---------------------------------------------------- -- Peer reviewed: -create table ${stats_db_name}.result_peerreviewed STORED AS PARQUET as +create table IF NOT EXISTS ${stats_db_name}.result_peerreviewed STORED AS PARQUET as select r.id as id, case when doi.doi_from_crossref=1 and grey.grey_lit=0 then true else false end as peer_reviewed from ${stats_db_name}.result r left outer join ${stats_db_name}.indi_pub_doi_from_crossref doi on doi.id=r.id left outer join ${stats_db_name}.indi_pub_grey_lit grey on grey.id=r.id; -- Green OA: -create table ${stats_db_name}.result_greenoa STORED AS PARQUET as +create table IF NOT EXISTS ${stats_db_name}.result_greenoa STORED AS PARQUET as select r.id, case when green.green_oa=1 then true else false end as green from ${stats_db_name}.result r left outer join ${stats_db_name}.indi_pub_green_oa green on green.id=r.id; -- GOLD OA: -create table ${stats_db_name}.result_gold STORED AS PARQUET as +create table IF NOT EXISTS ${stats_db_name}.result_gold STORED AS PARQUET as select r.id, case when gold.is_gold=1 then true else false end as gold from ${stats_db_name}.result r left outer join ${stats_db_name}.indi_pub_gold_oa gold on gold.id=r.id; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql index 572b70ef7..5af2e9edd 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql @@ -162,10 +162,10 @@ create table TARGET.indi_pub_doi_from_crossref stored as parquet as select * fro compute stats TARGET.indi_pub_doi_from_crossref; create table TARGET.indi_pub_gold_oa stored as parquet as select * from SOURCE.indi_pub_gold_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.indi_pub_gold_oa; -create table TARGET.indi_datasets_gold_oa stored as parquet as select * from SOURCE.indi_datasets_gold_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id); -compute stats TARGET.indi_datasets_gold_oa; -create table TARGET.indi_software_gold_oa stored as parquet as select * from SOURCE.indi_software_gold_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id); -compute stats TARGET.indi_software_gold_oa; +--create table TARGET.indi_datasets_gold_oa stored as parquet as select * from SOURCE.indi_datasets_gold_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id); +--compute stats TARGET.indi_datasets_gold_oa; +--create table TARGET.indi_software_gold_oa stored as parquet as select * from SOURCE.indi_software_gold_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id); +--compute stats TARGET.indi_software_gold_oa; create table TARGET.indi_pub_has_abstract stored as parquet as select * from SOURCE.indi_pub_has_abstract orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.indi_pub_has_abstract; create table TARGET.indi_result_has_cc_licence stored as parquet as select * from SOURCE.indi_result_has_cc_licence orig where exists (select 1 from TARGET.result r where r.id=orig.id);