forked from D-Net/dnet-hadoop
Merge pull request '[stats wf] Changes to indicators tables' (#244) from antonis.lempesis/dnet-hadoop:beta into beta
Reviewed-on: D-Net/dnet-hadoop#244
This commit is contained in:
commit
465e941214
|
@ -76,11 +76,11 @@ compute stats indi_result_with_orcid;
|
||||||
|
|
||||||
---- Sprint 3 ----
|
---- Sprint 3 ----
|
||||||
create table indi_funded_result_with_fundref stored as parquet as
|
create table indi_funded_result_with_fundref stored as parquet as
|
||||||
select distinct r.id, coalesce(fundref, 0) as fundref
|
select distinct r.result as id, coalesce(fundref, 0) as fundref
|
||||||
from project_results r
|
from project_results r
|
||||||
left outer join (select distinct id, 1 as fundref from project_results
|
left outer join (select distinct result, 1 as fundref from project_results
|
||||||
where provenance='Harvested') tmp
|
where provenance='Harvested') tmp
|
||||||
on r.id= tmp.id;
|
on r.result= tmp.result;
|
||||||
|
|
||||||
compute stats indi_funded_result_with_fundref;
|
compute stats indi_funded_result_with_fundref;
|
||||||
|
|
||||||
|
@ -179,17 +179,17 @@ from publication_datasources pd
|
||||||
|
|
||||||
compute stats indi_pub_diamond;
|
compute stats indi_pub_diamond;
|
||||||
|
|
||||||
create table indi_pub_hybrid stored as parquet as
|
--create table indi_pub_hybrid stored as parquet as
|
||||||
select distinct pd.id, coalesce(is_hybrid, 0) as is_hybrid
|
--select distinct pd.id, coalesce(is_hybrid, 0) as is_hybrid
|
||||||
from publication_datasources pd
|
--from publication_datasources pd
|
||||||
left outer join (
|
-- left outer join (
|
||||||
select pd.id, 1 as is_hybrid from publication_datasources pd
|
-- select pd.id, 1 as is_hybrid from publication_datasources pd
|
||||||
join datasource d on d.id=pd.datasource
|
-- join datasource d on d.id=pd.datasource
|
||||||
join stats_ext.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_online=d.issn_online)
|
-- join stats_ext.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_online=d.issn_online)
|
||||||
and (ps.journal_is_in_doaj=false and ps.journal_is_oa=false)) tmp
|
-- and (ps.journal_is_in_doaj=false and ps.journal_is_oa=false)) tmp
|
||||||
on pd.id=tmp.id;
|
-- on pd.id=tmp.id;
|
||||||
|
--
|
||||||
compute stats indi_pub_hybrid;
|
--compute stats indi_pub_hybrid;
|
||||||
|
|
||||||
create table indi_pub_in_transformative stored as parquet as
|
create table indi_pub_in_transformative stored as parquet as
|
||||||
select distinct pd.id, coalesce(is_transformative, 0) as is_transformative
|
select distinct pd.id, coalesce(is_transformative, 0) as is_transformative
|
||||||
|
@ -564,12 +564,12 @@ create table indi_org_fairness stored as parquet as
|
||||||
(select ro.organization organization, count(distinct ro.id) no_result_fair from result_organization ro
|
(select ro.organization organization, count(distinct ro.id) no_result_fair from result_organization ro
|
||||||
join result r on r.id=ro.id
|
join result r on r.id=ro.id
|
||||||
--join result_pids rp on r.id=rp.id
|
--join result_pids rp on r.id=rp.id
|
||||||
where (title is not null) and (publisher is not null) and (abstract is true) and (year is not null) and (authors>0) and year>2003
|
where (title is not null) and (publisher is not null) and (abstract is true) and (year is not null) and (authors>0) and cast(year as int)>2003
|
||||||
group by ro.organization),
|
group by ro.organization),
|
||||||
--return all results group by organization
|
--return all results group by organization
|
||||||
allresults as (select organization, count(distinct ro.id) no_allresults from result_organization ro
|
allresults as (select organization, count(distinct ro.id) no_allresults from result_organization ro
|
||||||
join result r on r.id=ro.id
|
join result r on r.id=ro.id
|
||||||
where year>2003
|
where cast(year as int)>2003
|
||||||
group by organization)
|
group by organization)
|
||||||
--return results_fair/all_results
|
--return results_fair/all_results
|
||||||
select allresults.organization, result_fair.no_result_fair/allresults.no_allresults org_fairness
|
select allresults.organization, result_fair.no_result_fair/allresults.no_allresults org_fairness
|
||||||
|
@ -638,11 +638,11 @@ create table indi_org_fairness_year stored as parquet as
|
||||||
(select year, ro.organization organization, count(distinct ro.id) no_result_fair from result_organization ro
|
(select year, ro.organization organization, count(distinct ro.id) no_result_fair from result_organization ro
|
||||||
join result r on r.id=ro.id
|
join result r on r.id=ro.id
|
||||||
join result_pids rp on r.id=rp.id
|
join result_pids rp on r.id=rp.id
|
||||||
where (title is not null) and (publisher is not null) and (abstract is true) and (year is not null) and (authors>0) and year>2003
|
where (title is not null) and (publisher is not null) and (abstract is true) and (year is not null) and (authors>0) and cast(year as int)>2003
|
||||||
group by ro.organization, year),
|
group by ro.organization, year),
|
||||||
allresults as (select year, organization, count(distinct ro.id) no_allresults from result_organization ro
|
allresults as (select year, organization, count(distinct ro.id) no_allresults from result_organization ro
|
||||||
join result r on r.id=ro.id
|
join result r on r.id=ro.id
|
||||||
where year>2003
|
where cast(year as int)>2003
|
||||||
group by organization, year)
|
group by organization, year)
|
||||||
--return results_fair/all_results
|
--return results_fair/all_results
|
||||||
select allresults.year, allresults.organization, result_fair.no_result_fair/allresults.no_allresults org_fairness
|
select allresults.year, allresults.organization, result_fair.no_result_fair/allresults.no_allresults org_fairness
|
||||||
|
@ -657,12 +657,12 @@ create table indi_org_findable_year stored as parquet as
|
||||||
(select year, ro.organization organization, count(distinct rp.id) no_result_with_pid from result_organization ro
|
(select year, ro.organization organization, count(distinct rp.id) no_result_with_pid from result_organization ro
|
||||||
join result_pids rp on rp.id=ro.id
|
join result_pids rp on rp.id=ro.id
|
||||||
join result r on r.id=rp.id
|
join result r on r.id=rp.id
|
||||||
where year >2003
|
where cast(year as int) >2003
|
||||||
group by ro.organization, year),
|
group by ro.organization, year),
|
||||||
--return all results group by organization,year
|
--return all results group by organization,year
|
||||||
allresults as (select year, organization, count(distinct ro.id) no_allresults from result_organization ro
|
allresults as (select year, organization, count(distinct ro.id) no_allresults from result_organization ro
|
||||||
join result r on r.id=ro.id
|
join result r on r.id=ro.id
|
||||||
where year >2003
|
where cast(year as int) >2003
|
||||||
group by organization, year)
|
group by organization, year)
|
||||||
--return results_with_pid/all_results
|
--return results_with_pid/all_results
|
||||||
select allresults.year, allresults.organization, result_with_pid.no_result_with_pid/allresults.no_allresults org_findable
|
select allresults.year, allresults.organization, result_with_pid.no_result_with_pid/allresults.no_allresults org_findable
|
||||||
|
@ -677,12 +677,12 @@ create table indi_org_findable stored as parquet as
|
||||||
(select ro.organization organization, count(distinct rp.id) no_result_with_pid from result_organization ro
|
(select ro.organization organization, count(distinct rp.id) no_result_with_pid from result_organization ro
|
||||||
join result_pids rp on rp.id=ro.id
|
join result_pids rp on rp.id=ro.id
|
||||||
join result r on r.id=rp.id
|
join result r on r.id=rp.id
|
||||||
where year >2003
|
where cast(year as int) >2003
|
||||||
group by ro.organization),
|
group by ro.organization),
|
||||||
--return all results group by organization
|
--return all results group by organization
|
||||||
allresults as (select organization, count(distinct ro.id) no_allresults from result_organization ro
|
allresults as (select organization, count(distinct ro.id) no_allresults from result_organization ro
|
||||||
join result r on r.id=ro.id
|
join result r on r.id=ro.id
|
||||||
where year >2003
|
where cast(year as int) >2003
|
||||||
group by organization)
|
group by organization)
|
||||||
--return results_with_pid/all_results
|
--return results_with_pid/all_results
|
||||||
select allresults.organization, result_with_pid.no_result_with_pid/allresults.no_allresults org_findable
|
select allresults.organization, result_with_pid.no_result_with_pid/allresults.no_allresults org_findable
|
||||||
|
|
|
@ -3,20 +3,20 @@
|
||||||
----------------------------------------------------
|
----------------------------------------------------
|
||||||
|
|
||||||
-- Peer reviewed:
|
-- Peer reviewed:
|
||||||
create table ${stats_db_name}.result_peerreviewed STORED AS PARQUET as
|
create table IF NOT EXISTS ${stats_db_name}.result_peerreviewed STORED AS PARQUET as
|
||||||
select r.id as id, case when doi.doi_from_crossref=1 and grey.grey_lit=0 then true else false end as peer_reviewed
|
select r.id as id, case when doi.doi_from_crossref=1 and grey.grey_lit=0 then true else false end as peer_reviewed
|
||||||
from ${stats_db_name}.result r
|
from ${stats_db_name}.result r
|
||||||
left outer join ${stats_db_name}.indi_pub_doi_from_crossref doi on doi.id=r.id
|
left outer join ${stats_db_name}.indi_pub_doi_from_crossref doi on doi.id=r.id
|
||||||
left outer join ${stats_db_name}.indi_pub_grey_lit grey on grey.id=r.id;
|
left outer join ${stats_db_name}.indi_pub_grey_lit grey on grey.id=r.id;
|
||||||
|
|
||||||
-- Green OA:
|
-- Green OA:
|
||||||
create table ${stats_db_name}.result_greenoa STORED AS PARQUET as
|
create table IF NOT EXISTS ${stats_db_name}.result_greenoa STORED AS PARQUET as
|
||||||
select r.id, case when green.green_oa=1 then true else false end as green
|
select r.id, case when green.green_oa=1 then true else false end as green
|
||||||
from ${stats_db_name}.result r
|
from ${stats_db_name}.result r
|
||||||
left outer join ${stats_db_name}.indi_pub_green_oa green on green.id=r.id;
|
left outer join ${stats_db_name}.indi_pub_green_oa green on green.id=r.id;
|
||||||
|
|
||||||
-- GOLD OA:
|
-- GOLD OA:
|
||||||
create table ${stats_db_name}.result_gold STORED AS PARQUET as
|
create table IF NOT EXISTS ${stats_db_name}.result_gold STORED AS PARQUET as
|
||||||
select r.id, case when gold.is_gold=1 then true else false end as gold
|
select r.id, case when gold.is_gold=1 then true else false end as gold
|
||||||
from ${stats_db_name}.result r
|
from ${stats_db_name}.result r
|
||||||
left outer join ${stats_db_name}.indi_pub_gold_oa gold on gold.id=r.id;
|
left outer join ${stats_db_name}.indi_pub_gold_oa gold on gold.id=r.id;
|
|
@ -45,7 +45,10 @@ create table TARGET.result stored as parquet as
|
||||||
'openorgs____::15e7921fc50d9aa1229a82a84429419e', -- University Of Thessaly
|
'openorgs____::15e7921fc50d9aa1229a82a84429419e', -- University Of Thessaly
|
||||||
'openorgs____::11f7919dadc8f8a7251af54bba60c956', -- Technical University of Crete
|
'openorgs____::11f7919dadc8f8a7251af54bba60c956', -- Technical University of Crete
|
||||||
'openorgs____::84f0c5f5dbb6daf42748485924efde4b', -- University of Piraeus
|
'openorgs____::84f0c5f5dbb6daf42748485924efde4b', -- University of Piraeus
|
||||||
'openorgs____::4ac562f0376fce3539504567649cb373' -- University of Patras
|
'openorgs____::4ac562f0376fce3539504567649cb373', -- University of Patras
|
||||||
|
'openorgs____::3e8d1f8c3f6cd7f418b09f1f58b4873b', -- Aristotle University of Thessaloniki
|
||||||
|
'openorgs____::3fcef6e1c469c10f2a84b281372c9814', -- World Bank
|
||||||
|
'openorgs____::1698a2eb1885ef8adb5a4a969e745ad3' -- École des Ponts ParisTech
|
||||||
) )) foo;
|
) )) foo;
|
||||||
compute stats TARGET.result;
|
compute stats TARGET.result;
|
||||||
|
|
||||||
|
@ -159,10 +162,10 @@ create table TARGET.indi_pub_doi_from_crossref stored as parquet as select * fro
|
||||||
compute stats TARGET.indi_pub_doi_from_crossref;
|
compute stats TARGET.indi_pub_doi_from_crossref;
|
||||||
create table TARGET.indi_pub_gold_oa stored as parquet as select * from SOURCE.indi_pub_gold_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
create table TARGET.indi_pub_gold_oa stored as parquet as select * from SOURCE.indi_pub_gold_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||||
compute stats TARGET.indi_pub_gold_oa;
|
compute stats TARGET.indi_pub_gold_oa;
|
||||||
create table TARGET.indi_datasets_gold_oa stored as parquet as select * from SOURCE.indi_datasets_gold_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
--create table TARGET.indi_datasets_gold_oa stored as parquet as select * from SOURCE.indi_datasets_gold_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||||
compute stats TARGET.indi_datasets_gold_oa;
|
--compute stats TARGET.indi_datasets_gold_oa;
|
||||||
create table TARGET.indi_software_gold_oa stored as parquet as select * from SOURCE.indi_software_gold_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
--create table TARGET.indi_software_gold_oa stored as parquet as select * from SOURCE.indi_software_gold_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||||
compute stats TARGET.indi_software_gold_oa;
|
--compute stats TARGET.indi_software_gold_oa;
|
||||||
create table TARGET.indi_pub_has_abstract stored as parquet as select * from SOURCE.indi_pub_has_abstract orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
create table TARGET.indi_pub_has_abstract stored as parquet as select * from SOURCE.indi_pub_has_abstract orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||||
compute stats TARGET.indi_pub_has_abstract;
|
compute stats TARGET.indi_pub_has_abstract;
|
||||||
create table TARGET.indi_result_has_cc_licence stored as parquet as select * from SOURCE.indi_result_has_cc_licence orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
create table TARGET.indi_result_has_cc_licence stored as parquet as select * from SOURCE.indi_result_has_cc_licence orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||||
|
|
Loading…
Reference in New Issue