From ebe586b1d14a8f49c01354fe09b496e7cb206c44 Mon Sep 17 00:00:00 2001 From: dimitrispie Date: Fri, 26 May 2023 10:25:28 +0300 Subject: [PATCH] Impact indicators/Unpaywall - Added Impact indicators - Added unpaywall open access colours --- .../oa/graph/stats/oozie_app/scripts/step14.sql | 6 +++++- .../scripts/step16-createIndicatorsTables.sql | 8 ++++++++ .../oozie_app/scripts/step20-createMonitorDB.sql | 16 +++++++++++++--- 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step14.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step14.sql index 47a6f84c2..dc9e6c1f9 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step14.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step14.sql @@ -46,4 +46,8 @@ FROM ( LEFT OUTER JOIN ( SELECT substr(d.id, 4) id from ${openaire_db_name}.datasource d - WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on o.datasource = d.id; \ No newline at end of file + WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on o.datasource = d.id; + +CREATE TABLE IF NOT EXISTS ${stats_db_name}.result_accessroute STORED AS PARQUET as +select distinct substr(id,4),id, accessroute from ${openaire_db_name}.result +lateral view explode (instance.accessright.openaccessroute) openaccessroute as accessroute; diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql index 7e560684b..ef573916f 100755 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql @@ -772,3 +772,11 @@ from result p on p.id= tmp.id; ANALYZE TABLE indi_result_with_pid COMPUTE STATISTICS; + +create table if not exists indi_impact_measures as +select distinct substr(id, 4), measures_ids.id impactmetric, measures_ids.unit.value[0] score, +cast(measures_ids.unit.value[0] as decimal(6,3)) score_dec, measures_ids.unit.value[1] class +from result lateral view explode(measures) measures as measures_ids +where measures_ids.id!='views' and measures_ids.id!='downloads'; + +ANALYZE TABLE indi_impact_measures COMPUTE STATISTICS; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql index bc72b6c15..86b5c7ca1 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql @@ -64,9 +64,12 @@ create table TARGET.result stored as parquet as 'openorgs____::38d7097854736583dde879d12dacafca' -- Brown University 'openorgs____::57784c9e047e826fefdb1ef816120d92', --Arts et Métiers ParisTech 'openorgs____::2530baca8a15936ba2e3297f2bce2e7e', -- University of Cape Town - 'openorgs____::d11f981828c485cd23d93f7f24f24db1', -- Technological University Dublin - 'openorgs____::5e6bf8962665cdd040341171e5c631d8', -- Delft University of Technology - 'openorgs____::846cb428d3f52a445f7275561a7beb5d' -- University of Manitoba + 'openorgs____::d11f981828c485cd23d93f7f24f24db1', -- Technological University Dublin + 'openorgs____::5e6bf8962665cdd040341171e5c631d8', -- Delft University of Technology + 'openorgs____::846cb428d3f52a445f7275561a7beb5d', -- University of Manitoba + 'openorgs____::eb391317ed0dc684aa81ac16265de041', -- Universitat Rovira i Virgili + 'openorgs____::66aa9fc2fceb271423dfabcc38752dc0', -- Lund University + 'openorgs____::3cff625a4370d51e08624cc586138b2f' -- IMT Atlantique ) )) foo; ANALYZE TABLE TARGET.result COMPUTE STATISTICS; @@ -140,6 +143,9 @@ ANALYZE TABLE TARGET.result_topics COMPUTE STATISTICS; create table TARGET.result_fos stored as parquet as select * from SOURCE.result_fos orig where exists (select 1 from TARGET.result r where r.id=orig.id); ANALYZE TABLE TARGET.result_fos COMPUTE STATISTICS; +create table TARGET.result_accessroute stored as parquet as select * from SOURCE.result_accessroute orig where exists (select 1 from TARGET.result r where r.id=orig.id); +ANALYZE TABLE TARGET.result_accessroute COMPUTE STATISTICS; + create view TARGET.foo1 as select * from SOURCE.result_result rr where rr.source in (select id from TARGET.result); create view TARGET.foo2 as select * from SOURCE.result_result rr where rr.target in (select id from TARGET.result); create table TARGET.result_result STORED AS PARQUET as select distinct * from (select * from TARGET.foo1 union all select * from TARGET.foo2) foufou; @@ -213,6 +219,8 @@ ANALYZE TABLE TARGET.indi_result_no_of_copies COMPUTE STATISTICS; ---- Sprint 6 ---- create table TARGET.indi_pub_hybrid_oa_with_cc stored as parquet as select * from SOURCE.indi_pub_hybrid_oa_with_cc orig where exists (select 1 from TARGET.result r where r.id=orig.id); ANALYZE TABLE TARGET.indi_pub_hybrid_oa_with_cc COMPUTE STATISTICS; +create table TARGET.indi_pub_bronze_oa stored as parquet as select * from SOURCE.indi_pub_bronze_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id); +ANALYZE TABLE TARGET.indi_pub_bronze_oa COMPUTE STATISTICS; create table TARGET.indi_pub_downloads stored as parquet as select * from SOURCE.indi_pub_downloads orig where exists (select 1 from TARGET.result r where r.id=orig.result_id); ANALYZE TABLE TARGET.indi_pub_downloads COMPUTE STATISTICS; create table TARGET.indi_pub_downloads_datasource stored as parquet as select * from SOURCE.indi_pub_downloads_datasource orig where exists (select 1 from TARGET.result r where r.id=orig.result_id); @@ -241,3 +249,5 @@ create table TARGET.indi_pub_in_subscribed stored as parquet as select * from SO ANALYZE TABLE TARGET.indi_pub_in_subscribed COMPUTE STATISTICS; create table TARGET.indi_result_with_pid stored as parquet as select * from SOURCE.indi_result_with_pid orig where exists (select 1 from TARGET.result r where r.id=orig.id); ANALYZE TABLE TARGET.indi_result_with_pid COMPUTE STATISTICS; +create table TARGET.indi_impact_measures stored as parquet as select * from SOURCE.indi_impact_measures orig where exists (select 1 from TARGET.result r where r.id=orig.id); +ANALYZE TABLE TARGET.indi_impact_measures COMPUTE STATISTICS;