From b920307bdd733d8a9a97067888a89cdec6bdbc46 Mon Sep 17 00:00:00 2001 From: dimitrispie Date: Tue, 9 Jan 2024 00:47:09 +0200 Subject: [PATCH] Changes to indicators --- .../scripts/step16-createIndicatorsTables.sql | 40 ++++++++++++++++--- .../scripts/step20-createMonitorDB.sql | 6 +++ .../scripts/step20-createMonitorDBAll.sql | 6 +++ .../scripts/step21-createObservatoryDB.sql | 3 +- 4 files changed, 48 insertions(+), 7 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql index f8213030e..eab91c99c 100755 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql @@ -58,7 +58,7 @@ select distinct r.id, (case when lic='' or lic is null then 0 else 1 end) as has from ${stats_db_name}.result r left outer join (select r.id, license.type as lic from ${stats_db_name}.result r join ${stats_db_name}.result_licenses as license on license.id = r.id - where lower(license.type) LIKE '%creativecommons.org%' OR lower(license.type) LIKE '%cc-%') tmp + where lower(license.type) LIKE '%creativecommons.org%' OR license.type LIKE '%CC%') tmp on r.id= tmp.id; drop table if exists ${stats_db_name}.indi_result_has_cc_licence_url purge; @@ -1189,7 +1189,7 @@ union all select pf.name from stats_ext.insitutions_for_publicly_funded pf join ${stats_db_name}.project p on p.funder=pf.name union all -select pf.name from stats_ext.insitutions_for_publicly_funded pf +select op.name from stats_ext.insitutions_for_publicly_funded pf join org_names_pids op on (op.name=pf.name or op.pid=pf.ror) and pf.publicly_funded='yes') foo) select distinct p.id, coalesce(publicly_funded, 0) as publicly_funded @@ -1210,6 +1210,36 @@ where ri.license is not null and datasource.type like '%Repository%' and datasou on p.id= tmp.id; create table ${stats_db_name}.result_country stored as parquet as -select distinct ro.id, o.country from ${stats_db_name}.result_organization ro -join ${stats_db_name}.organization o on o.id=ro.organization -join ${stats_db_name}.funder f on f.country=o.country; \ No newline at end of file +select distinct ro.id, coalesce(o.country, f.country) as country +from ${stats_db_name}.result_organization ro +left outer join ${stats_db_name}.organization o on o.id=ro.organization +left outer join ${stats_db_name}.result_projects rp on rp.id=ro.id +left outer join ${stats_db_name}.project p on p.id=rp.project +left outer join ${stats_db_name}.funder f on f.name=p.funder +where coalesce(o.country, f.country) IS NOT NULL; + +create table ${stats_db_name}.indi_result_oa_with_license stored as parquet as +select distinct r.id, coalesce(oa_with_license,0) as oa_with_license +from ${stats_db_name}.result r +left outer join (select distinct r.id, 1 as oa_with_license from ${stats_db_name}.result r +join ${stats_db_name}.result_licenses rl on rl.id=r.id where r.bestlicence='Open Access') tmp on r.id=tmp.id; + +create table ${stats_db_name}.indi_result_oa_without_license stored as parquet as +with without_license as +(select distinct id from ${stats_db_name}.indi_result_oa_with_license +where oa_with_license=0) +select distinct r.id, coalesce(oa_without_license,0) as oa_without_license +from ${stats_db_name}.result r +left outer join (select distinct r.id, 1 as oa_without_license +from ${stats_db_name}.result r +join without_license wl on wl.id=r.id +where r.bestlicence='Open Access') tmp on r.id=tmp.id; + +create table ${stats_db_name}.indi_result_under_transformative stored as parquet as +with transformative_dois as +(select distinct doi from stats_ext.transformative_facts) +select distinct r.id, coalesce(under_transformative,0) as under_transformative +from ${stats_db_name}.result r +left outer join (select distinct rp.id, 1 as under_transformative +from ${stats_db_name}.result_pids rp join ${stats_db_name}.result r on r.id=rp.id +join transformative_dois td on td.doi=rp.pid) tmp on r.id=tmp.id; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql index a99a75798..4abb6bdbc 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql @@ -181,5 +181,11 @@ create view TARGET.indi_funder_findable as select * from SOURCE.indi_funder_find create view TARGET.indi_ris_fairness as select * from SOURCE.indi_ris_fairness; create view TARGET.indi_ris_openess as select * from SOURCE.indi_ris_openess; create view TARGET.indi_ris_findable as select * from SOURCE.indi_ris_findable; + create table TARGET.indi_pub_green_with_license stored as parquet as select * from SOURCE.indi_pub_green_with_license orig where exists (select 1 from TARGET.result r where r.id=orig.id); create table TARGET.result_country stored as parquet as select * from SOURCE.result_country orig where exists (select 1 from TARGET.result r where r.id=orig.id); + +create table TARGET.indi_result_oa_with_license stored as parquet as select * from SOURCE.indi_result_oa_with_license orig where exists (select 1 from TARGET.result r where r.id=orig.id); +create table TARGET.indi_result_oa_without_license stored as parquet as select * from SOURCE.indi_result_oa_without_license orig where exists (select 1 from TARGET.result r where r.id=orig.id); + +create table TARGET.indi_result_under_transformative stored as parquet as select * from SOURCE.indi_result_under_transformative orig where exists (select 1 from TARGET.result r where r.id=orig.id); diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDBAll.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDBAll.sql index 671a9ea9e..b4f87a184 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDBAll.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDBAll.sql @@ -263,6 +263,12 @@ create view TARGET.indi_funder_findable as select * from SOURCE.indi_funder_find create view TARGET.indi_ris_fairness as select * from SOURCE.indi_ris_fairness; create view TARGET.indi_ris_openess as select * from SOURCE.indi_ris_openess; create view TARGET.indi_ris_findable as select * from SOURCE.indi_ris_findable; + create table TARGET.indi_pub_green_with_license stored as parquet as select * from SOURCE.indi_pub_green_with_license orig where exists (select 1 from TARGET.result r where r.id=orig.id); create table TARGET.result_country stored as parquet as select * from SOURCE.result_country orig where exists (select 1 from TARGET.result r where r.id=orig.id); +create table TARGET.indi_result_oa_with_license stored as parquet as select * from SOURCE.indi_result_oa_with_license orig where exists (select 1 from TARGET.result r where r.id=orig.id); +create table TARGET.indi_result_oa_without_license stored as parquet as select * from SOURCE.indi_result_oa_without_license orig where exists (select 1 from TARGET.result r where r.id=orig.id); + +create table TARGET.indi_result_under_transformative stored as parquet as select * from SOURCE.indi_result_under_transformative orig where exists (select 1 from TARGET.result r where r.id=orig.id); + diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step21-createObservatoryDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step21-createObservatoryDB.sql index 2e6f0711c..66620ac38 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step21-createObservatoryDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step21-createObservatoryDB.sql @@ -2,9 +2,8 @@ create table ${observatory_db_name}.result_cc_licence stored as parquet as select r.id, coalesce(rln.count, 0) > 0 as cc_licence from ${stats_db_name}.result r left outer join ( - select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count + select rl.id, sum(case when rl.type like 'CC%' then 1 else 0 end) as count from ${stats_db_name}.result_licenses rl - left outer join ${stats_db_name}.licenses_normalized rln on rl.type=rln.license group by rl.id ) rln on rln.id=r.id;