From 40b98d8182957618e2c0024ea2c3abe4c4a2d165 Mon Sep 17 00:00:00 2001 From: dimitrispie Date: Fri, 22 Dec 2023 10:29:20 +0200 Subject: [PATCH] Changes to indicators and funders definition - Changes result_refereed definition - Added result_country indicator - Added indi_pub_green_with_license indicator - Added country from jurisdiction to funders --- .../graph/stats/oozie_app/scripts/step15.sql | 96 ++++++++++++++++--- .../scripts/step16-createIndicatorsTables.sql | 19 +++- .../scripts/step20-createMonitorDB.sql | 4 +- .../scripts/step20-createMonitorDBAll.sql | 7 +- .../step20-createMonitorDB_institutions.sql | 3 +- .../graph/stats/oozie_app/scripts/step6.sql | 3 +- 6 files changed, 112 insertions(+), 20 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql index 066b197e6..343438cf4 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql @@ -8,31 +8,103 @@ DROP TABLE IF EXISTS ${stats_db_name}.publication_refereed purge; +--CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_refereed STORED AS PARQUET as +--select substr(r.id, 4) as id, inst.refereed.classname as refereed +--from ${openaire_db_name}.publication r lateral view explode(r.instance) instances as inst +--where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE; CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_refereed STORED AS PARQUET as -select substr(r.id, 4) as id, inst.refereed.classname as refereed -from ${openaire_db_name}.publication r lateral view explode(r.instance) instances as inst -where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE; +with peer_reviewed as +(select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed +from ${openaire_db_name}.publication r lateral +view explode(r.instance) instances as inst +where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE +and inst.refereed.classname='peerReviewed'), +non_peer_reviewed as +(select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed +from ${openaire_db_name}.publication r lateral +view explode(r.instance) instances as inst +where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE +and inst.refereed.classname='nonPeerReviewed') +select peer_reviewed.* from peer_reviewed +union all +select non_peer_reviewed.* from non_peer_reviewed +left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id +where peer_reviewed.id is null; DROP TABLE IF EXISTS ${stats_db_name}.dataset_refereed purge; +--CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_refereed STORED AS PARQUET as +--select substr(r.id, 4) as id, inst.refereed.classname as refereed +--from ${openaire_db_name}.dataset r lateral view explode(r.instance) instances as inst +--where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE; CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_refereed STORED AS PARQUET as -select substr(r.id, 4) as id, inst.refereed.classname as refereed -from ${openaire_db_name}.dataset r lateral view explode(r.instance) instances as inst -where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE; +with peer_reviewed as +(select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed +from ${openaire_db_name}.dataset r lateral +view explode(r.instance) instances as inst +where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE +and inst.refereed.classname='peerReviewed'), +non_peer_reviewed as +(select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed +from ${openaire_db_name}.dataset r lateral +view explode(r.instance) instances as inst +where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE +and inst.refereed.classname='nonPeerReviewed') +select peer_reviewed.* from peer_reviewed +union all +select non_peer_reviewed.* from non_peer_reviewed +left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id +where peer_reviewed.id is null; DROP TABLE IF EXISTS ${stats_db_name}.software_refereed purge; +--CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_refereed STORED AS PARQUET as +--select substr(r.id, 4) as id, inst.refereed.classname as refereed +--from ${openaire_db_name}.software r lateral view explode(r.instance) instances as inst +--where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE; CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_refereed STORED AS PARQUET as -select substr(r.id, 4) as id, inst.refereed.classname as refereed -from ${openaire_db_name}.software r lateral view explode(r.instance) instances as inst -where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE; +with peer_reviewed as +(select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed +from ${openaire_db_name}.software r lateral +view explode(r.instance) instances as inst +where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE +and inst.refereed.classname='peerReviewed'), +non_peer_reviewed as +(select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed +from ${openaire_db_name}.software r lateral +view explode(r.instance) instances as inst +where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE +and inst.refereed.classname='nonPeerReviewed') +select peer_reviewed.* from peer_reviewed +union all +select non_peer_reviewed.* from non_peer_reviewed +left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id +where peer_reviewed.id is null; DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_refereed purge; +--CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_refereed STORED AS PARQUET as +--select substr(r.id, 4) as id, inst.refereed.classname as refereed +--from ${openaire_db_name}.otherresearchproduct r lateral view explode(r.instance) instances as inst +--where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE; CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_refereed STORED AS PARQUET as -select substr(r.id, 4) as id, inst.refereed.classname as refereed -from ${openaire_db_name}.otherresearchproduct r lateral view explode(r.instance) instances as inst -where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE; +with peer_reviewed as +(select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed +from ${openaire_db_name}.otherresearchproduct r lateral +view explode(r.instance) instances as inst +where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE +and inst.refereed.classname='peerReviewed'), +non_peer_reviewed as +(select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed +from ${openaire_db_name}.otherresearchproduct r lateral +view explode(r.instance) instances as inst +where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE +and inst.refereed.classname='nonPeerReviewed') +select peer_reviewed.* from peer_reviewed +union all +select non_peer_reviewed.* from non_peer_reviewed +left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id +where peer_reviewed.id is null; CREATE VIEW IF NOT EXISTS ${stats_db_name}.result_refereed as select * from ${stats_db_name}.publication_refereed diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql index 5aa14e2c2..f8213030e 100755 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql @@ -1195,6 +1195,21 @@ and pf.publicly_funded='yes') foo) select distinct p.id, coalesce(publicly_funded, 0) as publicly_funded from ${stats_db_name}.publication p left outer join ( -select distinct ro.id, 1 as publicly_funded from result_organization ro +select distinct ro.id, 1 as publicly_funded from ${stats_db_name}.result_organization ro join ${stats_db_name}.organization o on o.id=ro.organization -join publicly_funded_orgs pfo on o.name=pfo.name) tmp on p.id=tmp.id; \ No newline at end of file +join publicly_funded_orgs pfo on o.name=pfo.name) tmp on p.id=tmp.id; + +create table ${stats_db_name}.indi_pub_green_with_license stored as parquet as +select distinct p.id, coalesce(green_with_license, 0) as green_with_license +from ${stats_db_name}.publication p +left outer join ( +select distinct p.id, 1 as green_with_license from ${stats_db_name}.publication p +join ${stats_db_name}.result_instance ri on ri.id = p.id +join ${stats_db_name}.datasource on datasource.id = ri.hostedby +where ri.license is not null and datasource.type like '%Repository%' and datasource.name!='Other') tmp +on p.id= tmp.id; + +create table ${stats_db_name}.result_country stored as parquet as +select distinct ro.id, o.country from ${stats_db_name}.result_organization ro +join ${stats_db_name}.organization o on o.id=ro.organization +join ${stats_db_name}.funder f on f.country=o.country; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql index cc8348f26..a99a75798 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql @@ -180,4 +180,6 @@ create view TARGET.indi_funder_openess as select * from SOURCE.indi_funder_opene create view TARGET.indi_funder_findable as select * from SOURCE.indi_funder_findable; create view TARGET.indi_ris_fairness as select * from SOURCE.indi_ris_fairness; create view TARGET.indi_ris_openess as select * from SOURCE.indi_ris_openess; -create view TARGET.indi_ris_findable as select * from SOURCE.indi_ris_findable; \ No newline at end of file +create view TARGET.indi_ris_findable as select * from SOURCE.indi_ris_findable; +create table TARGET.indi_pub_green_with_license stored as parquet as select * from SOURCE.indi_pub_green_with_license orig where exists (select 1 from TARGET.result r where r.id=orig.id); +create table TARGET.result_country stored as parquet as select * from SOURCE.result_country orig where exists (select 1 from TARGET.result r where r.id=orig.id); diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDBAll.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDBAll.sql index 42812d159..671a9ea9e 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDBAll.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDBAll.sql @@ -80,8 +80,8 @@ create table TARGET.result stored as parquet as 'openorgs____::0fccc7640f0cb44d5cd1b06b312a06b9', -- Cardiff University 'openorgs____::8839b55dae0c84d56fd533f52d5d483a', -- Leibniz Institute of Ecological Urban and Regional Development 'openorgs____::526468206bca24c1c90da6a312295cf4', -- Cyprus University of Technology - 'openorgs____::b5ca9d4340e26454e367e2908ef3872f' -- Alma Mater Studiorum University of Bologna - + 'openorgs____::b5ca9d4340e26454e367e2908ef3872f', -- Alma Mater Studiorum University of Bologna + 'openorgs____::a6340e6ecf60f6bba163659df985b0f2' -- TU Dresden ) )) foo; create view if not exists TARGET.category as select * from SOURCE.category; @@ -263,5 +263,6 @@ create view TARGET.indi_funder_findable as select * from SOURCE.indi_funder_find create view TARGET.indi_ris_fairness as select * from SOURCE.indi_ris_fairness; create view TARGET.indi_ris_openess as select * from SOURCE.indi_ris_openess; create view TARGET.indi_ris_findable as select * from SOURCE.indi_ris_findable; - +create table TARGET.indi_pub_green_with_license stored as parquet as select * from SOURCE.indi_pub_green_with_license orig where exists (select 1 from TARGET.result r where r.id=orig.id); +create table TARGET.result_country stored as parquet as select * from SOURCE.result_country orig where exists (select 1 from TARGET.result r where r.id=orig.id); diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_institutions.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_institutions.sql index 2c0ac337c..5c347eb6b 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_institutions.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_institutions.sql @@ -60,5 +60,6 @@ create table TARGET.result stored as parquet as 'openorgs____::0fccc7640f0cb44d5cd1b06b312a06b9', -- Cardiff University 'openorgs____::8839b55dae0c84d56fd533f52d5d483a', -- Leibniz Institute of Ecological Urban and Regional Development 'openorgs____::526468206bca24c1c90da6a312295cf4', -- Cyprus University of Technology - 'openorgs____::b5ca9d4340e26454e367e2908ef3872f' -- Alma Mater Studiorum University of Bologna + 'openorgs____::b5ca9d4340e26454e367e2908ef3872f', -- Alma Mater Studiorum University of Bologna + 'openorgs____::a6340e6ecf60f6bba163659df985b0f2' -- TU Dresden ))) foo; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql index e5b3f504e..165f77946 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql @@ -95,7 +95,8 @@ DROP TABLE IF EXISTS ${stats_db_name}.funder purge; create table ${stats_db_name}.funder STORED AS PARQUET as select distinct xpath_string(fund, '//funder/id') as id, xpath_string(fund, '//funder/name') as name, - xpath_string(fund, '//funder/shortname') as shortname + xpath_string(fund, '//funder/shortname') as shortname, + xpath_string(fundingtree[0].value, '//funder/jurisdiction') as country from ${openaire_db_name}.project p lateral view explode(p.fundingtree.value) fundingtree as fund; DROP TABLE IF EXISTS ${stats_db_name}.project_organization_contribution purge;