Merge pull request '[stats wf] Changes to indicators tables' (#244) from antonis.lempesis/dnet-hadoop:beta into beta

Reviewed-on: D-Net/dnet-hadoop#244
This commit is contained in:
Claudio Atzori 2022-09-16 10:13:58 +02:00
commit 465e941214
3 changed files with 33 additions and 30 deletions

View File

@ -76,11 +76,11 @@ compute stats indi_result_with_orcid;
---- Sprint 3 ---- ---- Sprint 3 ----
create table indi_funded_result_with_fundref stored as parquet as create table indi_funded_result_with_fundref stored as parquet as
select distinct r.id, coalesce(fundref, 0) as fundref select distinct r.result as id, coalesce(fundref, 0) as fundref
from project_results r from project_results r
left outer join (select distinct id, 1 as fundref from project_results left outer join (select distinct result, 1 as fundref from project_results
where provenance='Harvested') tmp where provenance='Harvested') tmp
on r.id= tmp.id; on r.result= tmp.result;
compute stats indi_funded_result_with_fundref; compute stats indi_funded_result_with_fundref;
@ -179,17 +179,17 @@ from publication_datasources pd
compute stats indi_pub_diamond; compute stats indi_pub_diamond;
create table indi_pub_hybrid stored as parquet as --create table indi_pub_hybrid stored as parquet as
select distinct pd.id, coalesce(is_hybrid, 0) as is_hybrid --select distinct pd.id, coalesce(is_hybrid, 0) as is_hybrid
from publication_datasources pd --from publication_datasources pd
left outer join ( -- left outer join (
select pd.id, 1 as is_hybrid from publication_datasources pd -- select pd.id, 1 as is_hybrid from publication_datasources pd
join datasource d on d.id=pd.datasource -- join datasource d on d.id=pd.datasource
join stats_ext.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_online=d.issn_online) -- join stats_ext.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_online=d.issn_online)
and (ps.journal_is_in_doaj=false and ps.journal_is_oa=false)) tmp -- and (ps.journal_is_in_doaj=false and ps.journal_is_oa=false)) tmp
on pd.id=tmp.id; -- on pd.id=tmp.id;
--
compute stats indi_pub_hybrid; --compute stats indi_pub_hybrid;
create table indi_pub_in_transformative stored as parquet as create table indi_pub_in_transformative stored as parquet as
select distinct pd.id, coalesce(is_transformative, 0) as is_transformative select distinct pd.id, coalesce(is_transformative, 0) as is_transformative
@ -564,12 +564,12 @@ create table indi_org_fairness stored as parquet as
(select ro.organization organization, count(distinct ro.id) no_result_fair from result_organization ro (select ro.organization organization, count(distinct ro.id) no_result_fair from result_organization ro
join result r on r.id=ro.id join result r on r.id=ro.id
--join result_pids rp on r.id=rp.id --join result_pids rp on r.id=rp.id
where (title is not null) and (publisher is not null) and (abstract is true) and (year is not null) and (authors>0) and year>2003 where (title is not null) and (publisher is not null) and (abstract is true) and (year is not null) and (authors>0) and cast(year as int)>2003
group by ro.organization), group by ro.organization),
--return all results group by organization --return all results group by organization
allresults as (select organization, count(distinct ro.id) no_allresults from result_organization ro allresults as (select organization, count(distinct ro.id) no_allresults from result_organization ro
join result r on r.id=ro.id join result r on r.id=ro.id
where year>2003 where cast(year as int)>2003
group by organization) group by organization)
--return results_fair/all_results --return results_fair/all_results
select allresults.organization, result_fair.no_result_fair/allresults.no_allresults org_fairness select allresults.organization, result_fair.no_result_fair/allresults.no_allresults org_fairness
@ -638,11 +638,11 @@ create table indi_org_fairness_year stored as parquet as
(select year, ro.organization organization, count(distinct ro.id) no_result_fair from result_organization ro (select year, ro.organization organization, count(distinct ro.id) no_result_fair from result_organization ro
join result r on r.id=ro.id join result r on r.id=ro.id
join result_pids rp on r.id=rp.id join result_pids rp on r.id=rp.id
where (title is not null) and (publisher is not null) and (abstract is true) and (year is not null) and (authors>0) and year>2003 where (title is not null) and (publisher is not null) and (abstract is true) and (year is not null) and (authors>0) and cast(year as int)>2003
group by ro.organization, year), group by ro.organization, year),
allresults as (select year, organization, count(distinct ro.id) no_allresults from result_organization ro allresults as (select year, organization, count(distinct ro.id) no_allresults from result_organization ro
join result r on r.id=ro.id join result r on r.id=ro.id
where year>2003 where cast(year as int)>2003
group by organization, year) group by organization, year)
--return results_fair/all_results --return results_fair/all_results
select allresults.year, allresults.organization, result_fair.no_result_fair/allresults.no_allresults org_fairness select allresults.year, allresults.organization, result_fair.no_result_fair/allresults.no_allresults org_fairness
@ -657,12 +657,12 @@ create table indi_org_findable_year stored as parquet as
(select year, ro.organization organization, count(distinct rp.id) no_result_with_pid from result_organization ro (select year, ro.organization organization, count(distinct rp.id) no_result_with_pid from result_organization ro
join result_pids rp on rp.id=ro.id join result_pids rp on rp.id=ro.id
join result r on r.id=rp.id join result r on r.id=rp.id
where year >2003 where cast(year as int) >2003
group by ro.organization, year), group by ro.organization, year),
--return all results group by organization,year --return all results group by organization,year
allresults as (select year, organization, count(distinct ro.id) no_allresults from result_organization ro allresults as (select year, organization, count(distinct ro.id) no_allresults from result_organization ro
join result r on r.id=ro.id join result r on r.id=ro.id
where year >2003 where cast(year as int) >2003
group by organization, year) group by organization, year)
--return results_with_pid/all_results --return results_with_pid/all_results
select allresults.year, allresults.organization, result_with_pid.no_result_with_pid/allresults.no_allresults org_findable select allresults.year, allresults.organization, result_with_pid.no_result_with_pid/allresults.no_allresults org_findable
@ -677,12 +677,12 @@ create table indi_org_findable stored as parquet as
(select ro.organization organization, count(distinct rp.id) no_result_with_pid from result_organization ro (select ro.organization organization, count(distinct rp.id) no_result_with_pid from result_organization ro
join result_pids rp on rp.id=ro.id join result_pids rp on rp.id=ro.id
join result r on r.id=rp.id join result r on r.id=rp.id
where year >2003 where cast(year as int) >2003
group by ro.organization), group by ro.organization),
--return all results group by organization --return all results group by organization
allresults as (select organization, count(distinct ro.id) no_allresults from result_organization ro allresults as (select organization, count(distinct ro.id) no_allresults from result_organization ro
join result r on r.id=ro.id join result r on r.id=ro.id
where year >2003 where cast(year as int) >2003
group by organization) group by organization)
--return results_with_pid/all_results --return results_with_pid/all_results
select allresults.organization, result_with_pid.no_result_with_pid/allresults.no_allresults org_findable select allresults.organization, result_with_pid.no_result_with_pid/allresults.no_allresults org_findable

View File

@ -3,20 +3,20 @@
---------------------------------------------------- ----------------------------------------------------
-- Peer reviewed: -- Peer reviewed:
create table ${stats_db_name}.result_peerreviewed STORED AS PARQUET as create table IF NOT EXISTS ${stats_db_name}.result_peerreviewed STORED AS PARQUET as
select r.id as id, case when doi.doi_from_crossref=1 and grey.grey_lit=0 then true else false end as peer_reviewed select r.id as id, case when doi.doi_from_crossref=1 and grey.grey_lit=0 then true else false end as peer_reviewed
from ${stats_db_name}.result r from ${stats_db_name}.result r
left outer join ${stats_db_name}.indi_pub_doi_from_crossref doi on doi.id=r.id left outer join ${stats_db_name}.indi_pub_doi_from_crossref doi on doi.id=r.id
left outer join ${stats_db_name}.indi_pub_grey_lit grey on grey.id=r.id; left outer join ${stats_db_name}.indi_pub_grey_lit grey on grey.id=r.id;
-- Green OA: -- Green OA:
create table ${stats_db_name}.result_greenoa STORED AS PARQUET as create table IF NOT EXISTS ${stats_db_name}.result_greenoa STORED AS PARQUET as
select r.id, case when green.green_oa=1 then true else false end as green select r.id, case when green.green_oa=1 then true else false end as green
from ${stats_db_name}.result r from ${stats_db_name}.result r
left outer join ${stats_db_name}.indi_pub_green_oa green on green.id=r.id; left outer join ${stats_db_name}.indi_pub_green_oa green on green.id=r.id;
-- GOLD OA: -- GOLD OA:
create table ${stats_db_name}.result_gold STORED AS PARQUET as create table IF NOT EXISTS ${stats_db_name}.result_gold STORED AS PARQUET as
select r.id, case when gold.is_gold=1 then true else false end as gold select r.id, case when gold.is_gold=1 then true else false end as gold
from ${stats_db_name}.result r from ${stats_db_name}.result r
left outer join ${stats_db_name}.indi_pub_gold_oa gold on gold.id=r.id; left outer join ${stats_db_name}.indi_pub_gold_oa gold on gold.id=r.id;

View File

@ -45,7 +45,10 @@ create table TARGET.result stored as parquet as
'openorgs____::15e7921fc50d9aa1229a82a84429419e', -- University Of Thessaly 'openorgs____::15e7921fc50d9aa1229a82a84429419e', -- University Of Thessaly
'openorgs____::11f7919dadc8f8a7251af54bba60c956', -- Technical University of Crete 'openorgs____::11f7919dadc8f8a7251af54bba60c956', -- Technical University of Crete
'openorgs____::84f0c5f5dbb6daf42748485924efde4b', -- University of Piraeus 'openorgs____::84f0c5f5dbb6daf42748485924efde4b', -- University of Piraeus
'openorgs____::4ac562f0376fce3539504567649cb373' -- University of Patras 'openorgs____::4ac562f0376fce3539504567649cb373', -- University of Patras
'openorgs____::3e8d1f8c3f6cd7f418b09f1f58b4873b', -- Aristotle University of Thessaloniki
'openorgs____::3fcef6e1c469c10f2a84b281372c9814', -- World Bank
'openorgs____::1698a2eb1885ef8adb5a4a969e745ad3' -- École des Ponts ParisTech
) )) foo; ) )) foo;
compute stats TARGET.result; compute stats TARGET.result;
@ -159,10 +162,10 @@ create table TARGET.indi_pub_doi_from_crossref stored as parquet as select * fro
compute stats TARGET.indi_pub_doi_from_crossref; compute stats TARGET.indi_pub_doi_from_crossref;
create table TARGET.indi_pub_gold_oa stored as parquet as select * from SOURCE.indi_pub_gold_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id); create table TARGET.indi_pub_gold_oa stored as parquet as select * from SOURCE.indi_pub_gold_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id);
compute stats TARGET.indi_pub_gold_oa; compute stats TARGET.indi_pub_gold_oa;
create table TARGET.indi_datasets_gold_oa stored as parquet as select * from SOURCE.indi_datasets_gold_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id); --create table TARGET.indi_datasets_gold_oa stored as parquet as select * from SOURCE.indi_datasets_gold_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id);
compute stats TARGET.indi_datasets_gold_oa; --compute stats TARGET.indi_datasets_gold_oa;
create table TARGET.indi_software_gold_oa stored as parquet as select * from SOURCE.indi_software_gold_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id); --create table TARGET.indi_software_gold_oa stored as parquet as select * from SOURCE.indi_software_gold_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id);
compute stats TARGET.indi_software_gold_oa; --compute stats TARGET.indi_software_gold_oa;
create table TARGET.indi_pub_has_abstract stored as parquet as select * from SOURCE.indi_pub_has_abstract orig where exists (select 1 from TARGET.result r where r.id=orig.id); create table TARGET.indi_pub_has_abstract stored as parquet as select * from SOURCE.indi_pub_has_abstract orig where exists (select 1 from TARGET.result r where r.id=orig.id);
compute stats TARGET.indi_pub_has_abstract; compute stats TARGET.indi_pub_has_abstract;
create table TARGET.indi_result_has_cc_licence stored as parquet as select * from SOURCE.indi_result_has_cc_licence orig where exists (select 1 from TARGET.result r where r.id=orig.id); create table TARGET.indi_result_has_cc_licence stored as parquet as select * from SOURCE.indi_result_has_cc_licence orig where exists (select 1 from TARGET.result r where r.id=orig.id);