forked from D-Net/dnet-hadoop
Merge pull request '[stats wf] Changes to indicators tables' (#244) from antonis.lempesis/dnet-hadoop:beta into beta
Reviewed-on: D-Net/dnet-hadoop#244
This commit is contained in:
commit
465e941214
|
@ -76,11 +76,11 @@ compute stats indi_result_with_orcid;
|
|||
|
||||
---- Sprint 3 ----
|
||||
create table indi_funded_result_with_fundref stored as parquet as
|
||||
select distinct r.id, coalesce(fundref, 0) as fundref
|
||||
select distinct r.result as id, coalesce(fundref, 0) as fundref
|
||||
from project_results r
|
||||
left outer join (select distinct id, 1 as fundref from project_results
|
||||
left outer join (select distinct result, 1 as fundref from project_results
|
||||
where provenance='Harvested') tmp
|
||||
on r.id= tmp.id;
|
||||
on r.result= tmp.result;
|
||||
|
||||
compute stats indi_funded_result_with_fundref;
|
||||
|
||||
|
@ -179,17 +179,17 @@ from publication_datasources pd
|
|||
|
||||
compute stats indi_pub_diamond;
|
||||
|
||||
create table indi_pub_hybrid stored as parquet as
|
||||
select distinct pd.id, coalesce(is_hybrid, 0) as is_hybrid
|
||||
from publication_datasources pd
|
||||
left outer join (
|
||||
select pd.id, 1 as is_hybrid from publication_datasources pd
|
||||
join datasource d on d.id=pd.datasource
|
||||
join stats_ext.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_online=d.issn_online)
|
||||
and (ps.journal_is_in_doaj=false and ps.journal_is_oa=false)) tmp
|
||||
on pd.id=tmp.id;
|
||||
|
||||
compute stats indi_pub_hybrid;
|
||||
--create table indi_pub_hybrid stored as parquet as
|
||||
--select distinct pd.id, coalesce(is_hybrid, 0) as is_hybrid
|
||||
--from publication_datasources pd
|
||||
-- left outer join (
|
||||
-- select pd.id, 1 as is_hybrid from publication_datasources pd
|
||||
-- join datasource d on d.id=pd.datasource
|
||||
-- join stats_ext.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_online=d.issn_online)
|
||||
-- and (ps.journal_is_in_doaj=false and ps.journal_is_oa=false)) tmp
|
||||
-- on pd.id=tmp.id;
|
||||
--
|
||||
--compute stats indi_pub_hybrid;
|
||||
|
||||
create table indi_pub_in_transformative stored as parquet as
|
||||
select distinct pd.id, coalesce(is_transformative, 0) as is_transformative
|
||||
|
@ -564,12 +564,12 @@ create table indi_org_fairness stored as parquet as
|
|||
(select ro.organization organization, count(distinct ro.id) no_result_fair from result_organization ro
|
||||
join result r on r.id=ro.id
|
||||
--join result_pids rp on r.id=rp.id
|
||||
where (title is not null) and (publisher is not null) and (abstract is true) and (year is not null) and (authors>0) and year>2003
|
||||
where (title is not null) and (publisher is not null) and (abstract is true) and (year is not null) and (authors>0) and cast(year as int)>2003
|
||||
group by ro.organization),
|
||||
--return all results group by organization
|
||||
allresults as (select organization, count(distinct ro.id) no_allresults from result_organization ro
|
||||
join result r on r.id=ro.id
|
||||
where year>2003
|
||||
where cast(year as int)>2003
|
||||
group by organization)
|
||||
--return results_fair/all_results
|
||||
select allresults.organization, result_fair.no_result_fair/allresults.no_allresults org_fairness
|
||||
|
@ -638,11 +638,11 @@ create table indi_org_fairness_year stored as parquet as
|
|||
(select year, ro.organization organization, count(distinct ro.id) no_result_fair from result_organization ro
|
||||
join result r on r.id=ro.id
|
||||
join result_pids rp on r.id=rp.id
|
||||
where (title is not null) and (publisher is not null) and (abstract is true) and (year is not null) and (authors>0) and year>2003
|
||||
where (title is not null) and (publisher is not null) and (abstract is true) and (year is not null) and (authors>0) and cast(year as int)>2003
|
||||
group by ro.organization, year),
|
||||
allresults as (select year, organization, count(distinct ro.id) no_allresults from result_organization ro
|
||||
join result r on r.id=ro.id
|
||||
where year>2003
|
||||
where cast(year as int)>2003
|
||||
group by organization, year)
|
||||
--return results_fair/all_results
|
||||
select allresults.year, allresults.organization, result_fair.no_result_fair/allresults.no_allresults org_fairness
|
||||
|
@ -657,12 +657,12 @@ create table indi_org_findable_year stored as parquet as
|
|||
(select year, ro.organization organization, count(distinct rp.id) no_result_with_pid from result_organization ro
|
||||
join result_pids rp on rp.id=ro.id
|
||||
join result r on r.id=rp.id
|
||||
where year >2003
|
||||
where cast(year as int) >2003
|
||||
group by ro.organization, year),
|
||||
--return all results group by organization,year
|
||||
allresults as (select year, organization, count(distinct ro.id) no_allresults from result_organization ro
|
||||
join result r on r.id=ro.id
|
||||
where year >2003
|
||||
where cast(year as int) >2003
|
||||
group by organization, year)
|
||||
--return results_with_pid/all_results
|
||||
select allresults.year, allresults.organization, result_with_pid.no_result_with_pid/allresults.no_allresults org_findable
|
||||
|
@ -677,12 +677,12 @@ create table indi_org_findable stored as parquet as
|
|||
(select ro.organization organization, count(distinct rp.id) no_result_with_pid from result_organization ro
|
||||
join result_pids rp on rp.id=ro.id
|
||||
join result r on r.id=rp.id
|
||||
where year >2003
|
||||
where cast(year as int) >2003
|
||||
group by ro.organization),
|
||||
--return all results group by organization
|
||||
allresults as (select organization, count(distinct ro.id) no_allresults from result_organization ro
|
||||
join result r on r.id=ro.id
|
||||
where year >2003
|
||||
where cast(year as int) >2003
|
||||
group by organization)
|
||||
--return results_with_pid/all_results
|
||||
select allresults.organization, result_with_pid.no_result_with_pid/allresults.no_allresults org_findable
|
||||
|
|
|
@ -3,20 +3,20 @@
|
|||
----------------------------------------------------
|
||||
|
||||
-- Peer reviewed:
|
||||
create table ${stats_db_name}.result_peerreviewed STORED AS PARQUET as
|
||||
create table IF NOT EXISTS ${stats_db_name}.result_peerreviewed STORED AS PARQUET as
|
||||
select r.id as id, case when doi.doi_from_crossref=1 and grey.grey_lit=0 then true else false end as peer_reviewed
|
||||
from ${stats_db_name}.result r
|
||||
left outer join ${stats_db_name}.indi_pub_doi_from_crossref doi on doi.id=r.id
|
||||
left outer join ${stats_db_name}.indi_pub_grey_lit grey on grey.id=r.id;
|
||||
|
||||
-- Green OA:
|
||||
create table ${stats_db_name}.result_greenoa STORED AS PARQUET as
|
||||
create table IF NOT EXISTS ${stats_db_name}.result_greenoa STORED AS PARQUET as
|
||||
select r.id, case when green.green_oa=1 then true else false end as green
|
||||
from ${stats_db_name}.result r
|
||||
left outer join ${stats_db_name}.indi_pub_green_oa green on green.id=r.id;
|
||||
|
||||
-- GOLD OA:
|
||||
create table ${stats_db_name}.result_gold STORED AS PARQUET as
|
||||
create table IF NOT EXISTS ${stats_db_name}.result_gold STORED AS PARQUET as
|
||||
select r.id, case when gold.is_gold=1 then true else false end as gold
|
||||
from ${stats_db_name}.result r
|
||||
left outer join ${stats_db_name}.indi_pub_gold_oa gold on gold.id=r.id;
|
|
@ -45,7 +45,10 @@ create table TARGET.result stored as parquet as
|
|||
'openorgs____::15e7921fc50d9aa1229a82a84429419e', -- University Of Thessaly
|
||||
'openorgs____::11f7919dadc8f8a7251af54bba60c956', -- Technical University of Crete
|
||||
'openorgs____::84f0c5f5dbb6daf42748485924efde4b', -- University of Piraeus
|
||||
'openorgs____::4ac562f0376fce3539504567649cb373' -- University of Patras
|
||||
'openorgs____::4ac562f0376fce3539504567649cb373', -- University of Patras
|
||||
'openorgs____::3e8d1f8c3f6cd7f418b09f1f58b4873b', -- Aristotle University of Thessaloniki
|
||||
'openorgs____::3fcef6e1c469c10f2a84b281372c9814', -- World Bank
|
||||
'openorgs____::1698a2eb1885ef8adb5a4a969e745ad3' -- École des Ponts ParisTech
|
||||
) )) foo;
|
||||
compute stats TARGET.result;
|
||||
|
||||
|
@ -159,10 +162,10 @@ create table TARGET.indi_pub_doi_from_crossref stored as parquet as select * fro
|
|||
compute stats TARGET.indi_pub_doi_from_crossref;
|
||||
create table TARGET.indi_pub_gold_oa stored as parquet as select * from SOURCE.indi_pub_gold_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||
compute stats TARGET.indi_pub_gold_oa;
|
||||
create table TARGET.indi_datasets_gold_oa stored as parquet as select * from SOURCE.indi_datasets_gold_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||
compute stats TARGET.indi_datasets_gold_oa;
|
||||
create table TARGET.indi_software_gold_oa stored as parquet as select * from SOURCE.indi_software_gold_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||
compute stats TARGET.indi_software_gold_oa;
|
||||
--create table TARGET.indi_datasets_gold_oa stored as parquet as select * from SOURCE.indi_datasets_gold_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||
--compute stats TARGET.indi_datasets_gold_oa;
|
||||
--create table TARGET.indi_software_gold_oa stored as parquet as select * from SOURCE.indi_software_gold_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||
--compute stats TARGET.indi_software_gold_oa;
|
||||
create table TARGET.indi_pub_has_abstract stored as parquet as select * from SOURCE.indi_pub_has_abstract orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||
compute stats TARGET.indi_pub_has_abstract;
|
||||
create table TARGET.indi_result_has_cc_licence stored as parquet as select * from SOURCE.indi_result_has_cc_licence orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||
|
|
Loading…
Reference in New Issue