Changes to beta stats wf #300

Merged
claudio.atzori merged 4 commits from antonis.lempesis/dnet-hadoop:beta into beta 2023-06-06 11:41:38 +02:00
3 changed files with 26 additions and 4 deletions
Showing only changes of commit ebe586b1d1 - Show all commits

View File

@ -47,3 +47,7 @@ FROM (
SELECT substr(d.id, 4) id SELECT substr(d.id, 4) id
from ${openaire_db_name}.datasource d from ${openaire_db_name}.datasource d
WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on o.datasource = d.id; WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on o.datasource = d.id;
CREATE TABLE IF NOT EXISTS ${stats_db_name}.result_accessroute STORED AS PARQUET as
select distinct substr(id,4),id, accessroute from ${openaire_db_name}.result
lateral view explode (instance.accessright.openaccessroute) openaccessroute as accessroute;

View File

@ -772,3 +772,11 @@ from result p
on p.id= tmp.id; on p.id= tmp.id;
ANALYZE TABLE indi_result_with_pid COMPUTE STATISTICS; ANALYZE TABLE indi_result_with_pid COMPUTE STATISTICS;
create table if not exists indi_impact_measures as
select distinct substr(id, 4), measures_ids.id impactmetric, measures_ids.unit.value[0] score,
cast(measures_ids.unit.value[0] as decimal(6,3)) score_dec, measures_ids.unit.value[1] class
from result lateral view explode(measures) measures as measures_ids
where measures_ids.id!='views' and measures_ids.id!='downloads';
ANALYZE TABLE indi_impact_measures COMPUTE STATISTICS;

View File

@ -66,7 +66,10 @@ create table TARGET.result stored as parquet as
'openorgs____::2530baca8a15936ba2e3297f2bce2e7e', -- University of Cape Town 'openorgs____::2530baca8a15936ba2e3297f2bce2e7e', -- University of Cape Town
'openorgs____::d11f981828c485cd23d93f7f24f24db1', -- Technological University Dublin 'openorgs____::d11f981828c485cd23d93f7f24f24db1', -- Technological University Dublin
'openorgs____::5e6bf8962665cdd040341171e5c631d8', -- Delft University of Technology 'openorgs____::5e6bf8962665cdd040341171e5c631d8', -- Delft University of Technology
'openorgs____::846cb428d3f52a445f7275561a7beb5d' -- University of Manitoba 'openorgs____::846cb428d3f52a445f7275561a7beb5d', -- University of Manitoba
'openorgs____::eb391317ed0dc684aa81ac16265de041', -- Universitat Rovira i Virgili
'openorgs____::66aa9fc2fceb271423dfabcc38752dc0', -- Lund University
'openorgs____::3cff625a4370d51e08624cc586138b2f' -- IMT Atlantique
) )) foo; ) )) foo;
ANALYZE TABLE TARGET.result COMPUTE STATISTICS; ANALYZE TABLE TARGET.result COMPUTE STATISTICS;
@ -140,6 +143,9 @@ ANALYZE TABLE TARGET.result_topics COMPUTE STATISTICS;
create table TARGET.result_fos stored as parquet as select * from SOURCE.result_fos orig where exists (select 1 from TARGET.result r where r.id=orig.id); create table TARGET.result_fos stored as parquet as select * from SOURCE.result_fos orig where exists (select 1 from TARGET.result r where r.id=orig.id);
ANALYZE TABLE TARGET.result_fos COMPUTE STATISTICS; ANALYZE TABLE TARGET.result_fos COMPUTE STATISTICS;
create table TARGET.result_accessroute stored as parquet as select * from SOURCE.result_accessroute orig where exists (select 1 from TARGET.result r where r.id=orig.id);
ANALYZE TABLE TARGET.result_accessroute COMPUTE STATISTICS;
create view TARGET.foo1 as select * from SOURCE.result_result rr where rr.source in (select id from TARGET.result); create view TARGET.foo1 as select * from SOURCE.result_result rr where rr.source in (select id from TARGET.result);
create view TARGET.foo2 as select * from SOURCE.result_result rr where rr.target in (select id from TARGET.result); create view TARGET.foo2 as select * from SOURCE.result_result rr where rr.target in (select id from TARGET.result);
create table TARGET.result_result STORED AS PARQUET as select distinct * from (select * from TARGET.foo1 union all select * from TARGET.foo2) foufou; create table TARGET.result_result STORED AS PARQUET as select distinct * from (select * from TARGET.foo1 union all select * from TARGET.foo2) foufou;
@ -213,6 +219,8 @@ ANALYZE TABLE TARGET.indi_result_no_of_copies COMPUTE STATISTICS;
---- Sprint 6 ---- ---- Sprint 6 ----
create table TARGET.indi_pub_hybrid_oa_with_cc stored as parquet as select * from SOURCE.indi_pub_hybrid_oa_with_cc orig where exists (select 1 from TARGET.result r where r.id=orig.id); create table TARGET.indi_pub_hybrid_oa_with_cc stored as parquet as select * from SOURCE.indi_pub_hybrid_oa_with_cc orig where exists (select 1 from TARGET.result r where r.id=orig.id);
ANALYZE TABLE TARGET.indi_pub_hybrid_oa_with_cc COMPUTE STATISTICS; ANALYZE TABLE TARGET.indi_pub_hybrid_oa_with_cc COMPUTE STATISTICS;
create table TARGET.indi_pub_bronze_oa stored as parquet as select * from SOURCE.indi_pub_bronze_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id);
ANALYZE TABLE TARGET.indi_pub_bronze_oa COMPUTE STATISTICS;
create table TARGET.indi_pub_downloads stored as parquet as select * from SOURCE.indi_pub_downloads orig where exists (select 1 from TARGET.result r where r.id=orig.result_id); create table TARGET.indi_pub_downloads stored as parquet as select * from SOURCE.indi_pub_downloads orig where exists (select 1 from TARGET.result r where r.id=orig.result_id);
ANALYZE TABLE TARGET.indi_pub_downloads COMPUTE STATISTICS; ANALYZE TABLE TARGET.indi_pub_downloads COMPUTE STATISTICS;
create table TARGET.indi_pub_downloads_datasource stored as parquet as select * from SOURCE.indi_pub_downloads_datasource orig where exists (select 1 from TARGET.result r where r.id=orig.result_id); create table TARGET.indi_pub_downloads_datasource stored as parquet as select * from SOURCE.indi_pub_downloads_datasource orig where exists (select 1 from TARGET.result r where r.id=orig.result_id);
@ -241,3 +249,5 @@ create table TARGET.indi_pub_in_subscribed stored as parquet as select * from SO
ANALYZE TABLE TARGET.indi_pub_in_subscribed COMPUTE STATISTICS; ANALYZE TABLE TARGET.indi_pub_in_subscribed COMPUTE STATISTICS;
create table TARGET.indi_result_with_pid stored as parquet as select * from SOURCE.indi_result_with_pid orig where exists (select 1 from TARGET.result r where r.id=orig.id); create table TARGET.indi_result_with_pid stored as parquet as select * from SOURCE.indi_result_with_pid orig where exists (select 1 from TARGET.result r where r.id=orig.id);
ANALYZE TABLE TARGET.indi_result_with_pid COMPUTE STATISTICS; ANALYZE TABLE TARGET.indi_result_with_pid COMPUTE STATISTICS;
create table TARGET.indi_impact_measures stored as parquet as select * from SOURCE.indi_impact_measures orig where exists (select 1 from TARGET.result r where r.id=orig.id);
ANALYZE TABLE TARGET.indi_impact_measures COMPUTE STATISTICS;