[Stats wf] #372, #405 to production #406

Merged
claudio.atzori merged 28 commits from antonis.lempesis/dnet-hadoop:beta into master 2024-03-26 12:18:27 +01:00
6 changed files with 112 additions and 20 deletions
Showing only changes of commit 40b98d8182 - Show all commits

View File

@ -8,31 +8,103 @@
DROP TABLE IF EXISTS ${stats_db_name}.publication_refereed purge; DROP TABLE IF EXISTS ${stats_db_name}.publication_refereed purge;
--CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_refereed STORED AS PARQUET as
--select substr(r.id, 4) as id, inst.refereed.classname as refereed
--from ${openaire_db_name}.publication r lateral view explode(r.instance) instances as inst
--where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE;
CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_refereed STORED AS PARQUET as CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_refereed STORED AS PARQUET as
select substr(r.id, 4) as id, inst.refereed.classname as refereed with peer_reviewed as
from ${openaire_db_name}.publication r lateral view explode(r.instance) instances as inst (select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed
where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE; from ${openaire_db_name}.publication r lateral
view explode(r.instance) instances as inst
where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE
and inst.refereed.classname='peerReviewed'),
non_peer_reviewed as
(select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed
from ${openaire_db_name}.publication r lateral
view explode(r.instance) instances as inst
where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE
and inst.refereed.classname='nonPeerReviewed')
select peer_reviewed.* from peer_reviewed
union all
select non_peer_reviewed.* from non_peer_reviewed
left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id
where peer_reviewed.id is null;
DROP TABLE IF EXISTS ${stats_db_name}.dataset_refereed purge; DROP TABLE IF EXISTS ${stats_db_name}.dataset_refereed purge;
--CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_refereed STORED AS PARQUET as
--select substr(r.id, 4) as id, inst.refereed.classname as refereed
--from ${openaire_db_name}.dataset r lateral view explode(r.instance) instances as inst
--where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE;
CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_refereed STORED AS PARQUET as CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_refereed STORED AS PARQUET as
select substr(r.id, 4) as id, inst.refereed.classname as refereed with peer_reviewed as
from ${openaire_db_name}.dataset r lateral view explode(r.instance) instances as inst (select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed
where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE; from ${openaire_db_name}.dataset r lateral
view explode(r.instance) instances as inst
where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE
and inst.refereed.classname='peerReviewed'),
non_peer_reviewed as
(select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed
from ${openaire_db_name}.dataset r lateral
view explode(r.instance) instances as inst
where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE
and inst.refereed.classname='nonPeerReviewed')
select peer_reviewed.* from peer_reviewed
union all
select non_peer_reviewed.* from non_peer_reviewed
left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id
where peer_reviewed.id is null;
DROP TABLE IF EXISTS ${stats_db_name}.software_refereed purge; DROP TABLE IF EXISTS ${stats_db_name}.software_refereed purge;
--CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_refereed STORED AS PARQUET as
--select substr(r.id, 4) as id, inst.refereed.classname as refereed
--from ${openaire_db_name}.software r lateral view explode(r.instance) instances as inst
--where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE;
CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_refereed STORED AS PARQUET as CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_refereed STORED AS PARQUET as
select substr(r.id, 4) as id, inst.refereed.classname as refereed with peer_reviewed as
from ${openaire_db_name}.software r lateral view explode(r.instance) instances as inst (select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed
where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE; from ${openaire_db_name}.software r lateral
view explode(r.instance) instances as inst
where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE
and inst.refereed.classname='peerReviewed'),
non_peer_reviewed as
(select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed
from ${openaire_db_name}.software r lateral
view explode(r.instance) instances as inst
where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE
and inst.refereed.classname='nonPeerReviewed')
select peer_reviewed.* from peer_reviewed
union all
select non_peer_reviewed.* from non_peer_reviewed
left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id
where peer_reviewed.id is null;
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_refereed purge; DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_refereed purge;
--CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_refereed STORED AS PARQUET as
--select substr(r.id, 4) as id, inst.refereed.classname as refereed
--from ${openaire_db_name}.otherresearchproduct r lateral view explode(r.instance) instances as inst
--where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE;
CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_refereed STORED AS PARQUET as CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_refereed STORED AS PARQUET as
select substr(r.id, 4) as id, inst.refereed.classname as refereed with peer_reviewed as
from ${openaire_db_name}.otherresearchproduct r lateral view explode(r.instance) instances as inst (select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed
where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE; from ${openaire_db_name}.otherresearchproduct r lateral
view explode(r.instance) instances as inst
where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE
and inst.refereed.classname='peerReviewed'),
non_peer_reviewed as
(select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed
from ${openaire_db_name}.otherresearchproduct r lateral
view explode(r.instance) instances as inst
where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE
and inst.refereed.classname='nonPeerReviewed')
select peer_reviewed.* from peer_reviewed
union all
select non_peer_reviewed.* from non_peer_reviewed
left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id
where peer_reviewed.id is null;
CREATE VIEW IF NOT EXISTS ${stats_db_name}.result_refereed as CREATE VIEW IF NOT EXISTS ${stats_db_name}.result_refereed as
select * from ${stats_db_name}.publication_refereed select * from ${stats_db_name}.publication_refereed

View File

@ -1195,6 +1195,21 @@ and pf.publicly_funded='yes') foo)
select distinct p.id, coalesce(publicly_funded, 0) as publicly_funded select distinct p.id, coalesce(publicly_funded, 0) as publicly_funded
from ${stats_db_name}.publication p from ${stats_db_name}.publication p
left outer join ( left outer join (
select distinct ro.id, 1 as publicly_funded from result_organization ro select distinct ro.id, 1 as publicly_funded from ${stats_db_name}.result_organization ro
join ${stats_db_name}.organization o on o.id=ro.organization join ${stats_db_name}.organization o on o.id=ro.organization
join publicly_funded_orgs pfo on o.name=pfo.name) tmp on p.id=tmp.id; join publicly_funded_orgs pfo on o.name=pfo.name) tmp on p.id=tmp.id;
create table ${stats_db_name}.indi_pub_green_with_license stored as parquet as
select distinct p.id, coalesce(green_with_license, 0) as green_with_license
from ${stats_db_name}.publication p
left outer join (
select distinct p.id, 1 as green_with_license from ${stats_db_name}.publication p
join ${stats_db_name}.result_instance ri on ri.id = p.id
join ${stats_db_name}.datasource on datasource.id = ri.hostedby
where ri.license is not null and datasource.type like '%Repository%' and datasource.name!='Other') tmp
on p.id= tmp.id;
create table ${stats_db_name}.result_country stored as parquet as
select distinct ro.id, o.country from ${stats_db_name}.result_organization ro
join ${stats_db_name}.organization o on o.id=ro.organization
join ${stats_db_name}.funder f on f.country=o.country;

View File

@ -180,4 +180,6 @@ create view TARGET.indi_funder_openess as select * from SOURCE.indi_funder_opene
create view TARGET.indi_funder_findable as select * from SOURCE.indi_funder_findable; create view TARGET.indi_funder_findable as select * from SOURCE.indi_funder_findable;
create view TARGET.indi_ris_fairness as select * from SOURCE.indi_ris_fairness; create view TARGET.indi_ris_fairness as select * from SOURCE.indi_ris_fairness;
create view TARGET.indi_ris_openess as select * from SOURCE.indi_ris_openess; create view TARGET.indi_ris_openess as select * from SOURCE.indi_ris_openess;
create view TARGET.indi_ris_findable as select * from SOURCE.indi_ris_findable; create view TARGET.indi_ris_findable as select * from SOURCE.indi_ris_findable;
create table TARGET.indi_pub_green_with_license stored as parquet as select * from SOURCE.indi_pub_green_with_license orig where exists (select 1 from TARGET.result r where r.id=orig.id);
create table TARGET.result_country stored as parquet as select * from SOURCE.result_country orig where exists (select 1 from TARGET.result r where r.id=orig.id);

View File

@ -80,8 +80,8 @@ create table TARGET.result stored as parquet as
'openorgs____::0fccc7640f0cb44d5cd1b06b312a06b9', -- Cardiff University 'openorgs____::0fccc7640f0cb44d5cd1b06b312a06b9', -- Cardiff University
'openorgs____::8839b55dae0c84d56fd533f52d5d483a', -- Leibniz Institute of Ecological Urban and Regional Development 'openorgs____::8839b55dae0c84d56fd533f52d5d483a', -- Leibniz Institute of Ecological Urban and Regional Development
'openorgs____::526468206bca24c1c90da6a312295cf4', -- Cyprus University of Technology 'openorgs____::526468206bca24c1c90da6a312295cf4', -- Cyprus University of Technology
'openorgs____::b5ca9d4340e26454e367e2908ef3872f' -- Alma Mater Studiorum University of Bologna 'openorgs____::b5ca9d4340e26454e367e2908ef3872f', -- Alma Mater Studiorum University of Bologna
'openorgs____::a6340e6ecf60f6bba163659df985b0f2' -- TU Dresden
) )) foo; ) )) foo;
create view if not exists TARGET.category as select * from SOURCE.category; create view if not exists TARGET.category as select * from SOURCE.category;
@ -263,5 +263,6 @@ create view TARGET.indi_funder_findable as select * from SOURCE.indi_funder_find
create view TARGET.indi_ris_fairness as select * from SOURCE.indi_ris_fairness; create view TARGET.indi_ris_fairness as select * from SOURCE.indi_ris_fairness;
create view TARGET.indi_ris_openess as select * from SOURCE.indi_ris_openess; create view TARGET.indi_ris_openess as select * from SOURCE.indi_ris_openess;
create view TARGET.indi_ris_findable as select * from SOURCE.indi_ris_findable; create view TARGET.indi_ris_findable as select * from SOURCE.indi_ris_findable;
create table TARGET.indi_pub_green_with_license stored as parquet as select * from SOURCE.indi_pub_green_with_license orig where exists (select 1 from TARGET.result r where r.id=orig.id);
create table TARGET.result_country stored as parquet as select * from SOURCE.result_country orig where exists (select 1 from TARGET.result r where r.id=orig.id);

View File

@ -60,5 +60,6 @@ create table TARGET.result stored as parquet as
'openorgs____::0fccc7640f0cb44d5cd1b06b312a06b9', -- Cardiff University 'openorgs____::0fccc7640f0cb44d5cd1b06b312a06b9', -- Cardiff University
'openorgs____::8839b55dae0c84d56fd533f52d5d483a', -- Leibniz Institute of Ecological Urban and Regional Development 'openorgs____::8839b55dae0c84d56fd533f52d5d483a', -- Leibniz Institute of Ecological Urban and Regional Development
'openorgs____::526468206bca24c1c90da6a312295cf4', -- Cyprus University of Technology 'openorgs____::526468206bca24c1c90da6a312295cf4', -- Cyprus University of Technology
'openorgs____::b5ca9d4340e26454e367e2908ef3872f' -- Alma Mater Studiorum University of Bologna 'openorgs____::b5ca9d4340e26454e367e2908ef3872f', -- Alma Mater Studiorum University of Bologna
'openorgs____::a6340e6ecf60f6bba163659df985b0f2' -- TU Dresden
))) foo; ))) foo;

View File

@ -95,7 +95,8 @@ DROP TABLE IF EXISTS ${stats_db_name}.funder purge;
create table ${stats_db_name}.funder STORED AS PARQUET as create table ${stats_db_name}.funder STORED AS PARQUET as
select distinct xpath_string(fund, '//funder/id') as id, select distinct xpath_string(fund, '//funder/id') as id,
xpath_string(fund, '//funder/name') as name, xpath_string(fund, '//funder/name') as name,
xpath_string(fund, '//funder/shortname') as shortname xpath_string(fund, '//funder/shortname') as shortname,
xpath_string(fundingtree[0].value, '//funder/jurisdiction') as country
from ${openaire_db_name}.project p lateral view explode(p.fundingtree.value) fundingtree as fund; from ${openaire_db_name}.project p lateral view explode(p.fundingtree.value) fundingtree as fund;
DROP TABLE IF EXISTS ${stats_db_name}.project_organization_contribution purge; DROP TABLE IF EXISTS ${stats_db_name}.project_organization_contribution purge;