From dcd85f8cd79ab8342e03eca6ef9530f35dd242ab Mon Sep 17 00:00:00 2001 From: dimitrispie Date: Thu, 22 Sep 2022 13:33:07 +0300 Subject: [PATCH] - Synchronize indicators in stats-db with monitor-db - added new openorg id for Nanyang Technological University - changed openorg id for University of Helsinki #8088 ticket --- .../scripts/step16-createIndicatorsTables.sql | 35 +++++++++++ .../scripts/step20-createMonitorDB.sql | 61 ++++++++++++------- 2 files changed, 74 insertions(+), 22 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql index b8792ae615..417ed6e4e7 100755 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql @@ -92,6 +92,17 @@ compute stats indi_funded_result_with_fundref; -- -- compute stats indi_result_org_collab; -- +create table indi_result_org_collab stored as parquet as +with tmp as ( +select distinct ro.organization organization, ro.id from result_organization ro +join organization o on o.id=ro.organization where o.name is not null) +select o1.organization org1, o2.organization org2, count(o1.id) as collaborations +from tmp as o1 +join tmp as o2 on o1.id=o2.id and o1.organization!=o2.organization +group by org1, org2; + +compute stats indi_result_org_collab; + -- create table indi_result_org_country_collab stored as parquet as -- with tmp as -- (select o.id as id, o.country , ro.id as result,r.type from organization o @@ -105,6 +116,17 @@ compute stats indi_funded_result_with_fundref; -- -- compute stats indi_result_org_country_collab; -- +create table indi_result_org_country_collab stored as parquet as +with tmp as +(select distinct ro.organization organization, ro.id, o.country from result_organization ro +join organization o on o.id=ro.organization where country <> 'UNKNOWN' and o.name is not null) +select o1.organization org1,o2.country country2, count(o1.id) as collaborations +from tmp as o1 join tmp as o2 on o1.id=o2.id +where o1.id=o2.id and o1.country!=o2.country +group by o1.organization, o1.id, o2.country; + +compute stats indi_result_org_country_collab; + -- create table indi_result_org_collab stored as parquet as -- with tmp as -- (select o.id, ro.id as result,r.type from organization o @@ -166,6 +188,19 @@ compute stats indi_funder_country_collab; -- -- compute stats indi_result_country_collab; +create table indi_result_country_collab stored as parquet as +with tmp as + (select distinct country, ro.id as result from organization o + join result_organization ro on o.id=ro.organization + where country <> 'UNKNOWN' and o.name is not null) +select o1.country country1, o2.country country2, count(o1.result) as collaborations +from tmp as o1 + join tmp as o2 on o1.result=o2.result +where o1.country<>o2.country +group by o1.country, o2.country; + +compute stats indi_result_country_collab; + ---- Sprint 4 ---- create table indi_pub_diamond stored as parquet as select distinct pd.id, coalesce(in_diamond_journal, 0) as in_diamond_journal diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql index 5af2e9edd5..290acbf9fc 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql @@ -23,7 +23,7 @@ create table TARGET.result stored as parquet as 'openorgs____::d2a09b9d5eabb10c95f9470e172d05d2', --??? Not exists ?? 'openorgs____::d169c7407dd417152596908d48c11460', --Masaryk University 'openorgs____::1ec924b1759bb16d0a02f2dad8689b21', --University of Belgrade - 'openorgs____::2fb1e47b4612688d9de9169d579939a7', --University of Helsinki + 'openorgs____::0ae431b820e4c33db8967fbb2b919150', --University of Helsinki 'openorgs____::759d59f05d77188faee99b7493b46805', --University of Minho 'openorgs____::cad284878801b9465fa51a95b1d779db', --Universidad Politécnica de Madrid 'openorgs____::eadc8da90a546e98c03f896661a2e4d4', --University of Göttingen @@ -48,7 +48,8 @@ create table TARGET.result stored as parquet as 'openorgs____::4ac562f0376fce3539504567649cb373', -- University of Patras 'openorgs____::3e8d1f8c3f6cd7f418b09f1f58b4873b', -- Aristotle University of Thessaloniki 'openorgs____::3fcef6e1c469c10f2a84b281372c9814', -- World Bank - 'openorgs____::1698a2eb1885ef8adb5a4a969e745ad3' -- École des Ponts ParisTech + 'openorgs____::1698a2eb1885ef8adb5a4a969e745ad3', -- École des Ponts ParisTech + 'openorgs____::e15adb13c4dadd49de4d35c39b5da93a' -- Nanyang Technological University ) )) foo; compute stats TARGET.result; @@ -154,50 +155,44 @@ create table TARGET.project_results stored as parquet as select id as result, pr compute stats TARGET.project_results; -- indicators +-- Sprint 1 ---- create table TARGET.indi_pub_green_oa stored as parquet as select * from SOURCE.indi_pub_green_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.indi_pub_green_oa; create table TARGET.indi_pub_grey_lit stored as parquet as select * from SOURCE.indi_pub_grey_lit orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.indi_pub_grey_lit; create table TARGET.indi_pub_doi_from_crossref stored as parquet as select * from SOURCE.indi_pub_doi_from_crossref orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.indi_pub_doi_from_crossref; -create table TARGET.indi_pub_gold_oa stored as parquet as select * from SOURCE.indi_pub_gold_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id); -compute stats TARGET.indi_pub_gold_oa; ---create table TARGET.indi_datasets_gold_oa stored as parquet as select * from SOURCE.indi_datasets_gold_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id); ---compute stats TARGET.indi_datasets_gold_oa; ---create table TARGET.indi_software_gold_oa stored as parquet as select * from SOURCE.indi_software_gold_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id); ---compute stats TARGET.indi_software_gold_oa; -create table TARGET.indi_pub_has_abstract stored as parquet as select * from SOURCE.indi_pub_has_abstract orig where exists (select 1 from TARGET.result r where r.id=orig.id); -compute stats TARGET.indi_pub_has_abstract; +-- Sprint 2 ---- create table TARGET.indi_result_has_cc_licence stored as parquet as select * from SOURCE.indi_result_has_cc_licence orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.indi_result_has_cc_licence; create table TARGET.indi_result_has_cc_licence_url stored as parquet as select * from SOURCE.indi_result_has_cc_licence_url orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.indi_result_has_cc_licence_url; - -create view TARGET.indi_funder_country_collab as select * from SOURCE.indi_funder_country_collab; -create view TARGET.indi_project_collab_org as select * from SOURCE.indi_project_collab_org; -create view TARGET.indi_project_collab_org_country as select * from SOURCE.indi_project_collab_org_country; - +create table TARGET.indi_pub_has_abstract stored as parquet as select * from SOURCE.indi_pub_has_abstract orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.indi_pub_has_abstract; create table TARGET.indi_result_with_orcid stored as parquet as select * from SOURCE.indi_result_with_orcid orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.indi_result_with_orcid; +---- Sprint 3 ---- create table TARGET.indi_funded_result_with_fundref stored as parquet as select * from SOURCE.indi_funded_result_with_fundref orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.indi_funded_result_with_fundref; +create view TARGET.indi_result_org_collab as select * from SOURCE.indi_result_org_collab; +create view TARGET.indi_result_org_country_collab as select * from SOURCE.indi_result_org_country_collab; +create view TARGET.indi_project_collab_org as select * from SOURCE.indi_project_collab_org; +create view TARGET.indi_project_collab_org_country as select * from SOURCE.indi_project_collab_org_country; +create view TARGET.indi_funder_country_collab as select * from SOURCE.indi_funder_country_collab; +create view TARGET.indi_result_country_collab as select * from SOURCE.indi_result_country_collab; +---- Sprint 4 ---- create table TARGET.indi_pub_diamond stored as parquet as select * from SOURCE.indi_pub_diamond orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.indi_pub_diamond; -create table TARGET.indi_pub_hybrid stored as parquet as select * from SOURCE.indi_pub_hybrid orig where exists (select 1 from TARGET.result r where r.id=orig.id); -compute stats TARGET.indi_pub_hybrid; create table TARGET.indi_pub_in_transformative stored as parquet as select * from SOURCE.indi_pub_in_transformative orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.indi_pub_in_transformative; create table TARGET.indi_pub_closed_other_open stored as parquet as select * from SOURCE.indi_pub_closed_other_open orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.indi_pub_closed_other_open; - +---- Sprint 5 ---- create table TARGET.indi_result_no_of_copies stored as parquet as select * from SOURCE.indi_result_no_of_copies orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.indi_result_no_of_copies; - -create view TARGET.indi_org_findable as select * from SOURCE.indi_org_findable; -create view TARGET.indi_org_openess as select * from SOURCE.indi_org_openess; +---- Sprint 6 ---- create table TARGET.indi_pub_hybrid_oa_with_cc stored as parquet as select * from SOURCE.indi_pub_hybrid_oa_with_cc orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.indi_pub_hybrid_oa_with_cc; - create table TARGET.indi_pub_downloads stored as parquet as select * from SOURCE.indi_pub_downloads orig where exists (select 1 from TARGET.result r where r.id=orig.result_id); compute stats TARGET.indi_pub_downloads; create table TARGET.indi_pub_downloads_datasource stored as parquet as select * from SOURCE.indi_pub_downloads_datasource orig where exists (select 1 from TARGET.result r where r.id=orig.result_id); @@ -206,6 +201,28 @@ create table TARGET.indi_pub_downloads_year stored as parquet as select * from S compute stats TARGET.indi_pub_downloads_year; create table TARGET.indi_pub_downloads_datasource_year stored as parquet as select * from SOURCE.indi_pub_downloads_datasource_year orig where exists (select 1 from TARGET.result r where r.id=orig.result_id); compute stats TARGET.indi_pub_downloads_datasource_year; +---- Sprint 7 ---- +create table TARGET.indi_pub_gold_oa stored as parquet as select * from SOURCE.indi_pub_gold_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.indi_pub_gold_oa; +create table TARGET.indi_pub_hybrid stored as parquet as select * from SOURCE.indi_pub_hybrid orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.indi_pub_hybrid; +create view TARGET.indi_org_fairness as select * from SOURCE.indi_org_fairness; +create view TARGET.indi_org_fairness_pub_pr as select * from SOURCE.indi_org_fairness_pub_pr; +create view TARGET.indi_org_fairness_pub_year as select * from SOURCE.indi_org_fairness_pub_year; +create view TARGET.indi_org_fairness_pub as select * from SOURCE.indi_org_fairness_pub; +create view TARGET.indi_org_fairness_year as select * from SOURCE.indi_org_fairness_year; +create view TARGET.indi_org_findable_year as select * from SOURCE.indi_org_findable_year; +create view TARGET.indi_org_findable as select * from SOURCE.indi_org_findable; +create view TARGET.indi_org_openess as select * from SOURCE.indi_org_openess; +create view TARGET.indi_org_openess_year as select * from SOURCE.indi_org_openess_year; +create table TARGET.indi_pub_has_preprint stored as parquet as select * from SOURCE.indi_pub_has_preprint orig where exists (select 1 from TARGET.result r where r.id=orig.id); +create table TARGET.indi_pub_in_subscribed stored as parquet as select * from SOURCE.indi_pub_in_subscribed orig where exists (select 1 from TARGET.result r where r.id=orig.id); +create table TARGET.indi_result_with_pid stored as parquet as select * from SOURCE.indi_result_with_pid orig where exists (select 1 from TARGET.result r where r.id=orig.id); + +--create table TARGET.indi_datasets_gold_oa stored as parquet as select * from SOURCE.indi_datasets_gold_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id); +--compute stats TARGET.indi_datasets_gold_oa; +--create table TARGET.indi_software_gold_oa stored as parquet as select * from SOURCE.indi_software_gold_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id); +--compute stats TARGET.indi_software_gold_oa; --denorm alter table TARGET.result rename to TARGET.res_tmp;