code cleanup

This commit is contained in:
Antonis Lempesis 2024-03-22 10:16:49 +02:00
parent 459167ac2f
commit 4c40c96e30
1 changed files with 181 additions and 237 deletions

View File

@ -1,204 +1,154 @@
-- Sprint 1 ----
drop table if exists ${stats_db_name}.indi_pub_green_oa purge; /*EOS*/
--create table if not exists ${stats_db_name}.indi_pub_green_oa stored as parquet as
--select distinct p.id, coalesce(green_oa, 0) as green_oa
--from ${stats_db_name}.publication p
-- left outer join (
-- select p.id, 1 as green_oa
-- from ${stats_db_name}.publication p
-- join ${stats_db_name}.result_instance ri on ri.id = p.id
-- join ${stats_db_name}.datasource on datasource.id = ri.hostedby
-- where datasource.type like '%Repository%'
-- and (ri.accessright = 'Open Access'
-- or ri.accessright = 'Embargo' or ri.accessright = 'Open Source')) tmp
-- on p.id= tmp.id;
create table if not exists ${stats_db_name}.indi_pub_green_oa stored as parquet as
select distinct p.id, coalesce(green_oa, 0) as green_oa
from ${stats_db_name}.publication p
left outer join (
left outer join (
select p.id, 1 as green_oa
from ${stats_db_name}.publication p
join ${stats_db_name}.result_instance ri on ri.id = p.id
join ${stats_db_name}.datasource on datasource.id = ri.hostedby
where datasource.type like '%Repository%'
and (ri.accessright = 'Open Access'
or ri.accessright = 'Embargo' or ri.accessright = 'Open Source') and datasource.name!='Other') tmp
on p.id= tmp.id; /*EOS*/
where datasource.type like '%Repository%' and (ri.accessright = 'Open Access' or ri.accessright = 'Embargo' or ri.accessright = 'Open Source') and datasource.name!='Other') tmp on p.id= tmp.id; /*EOS*/
drop table if exists ${stats_db_name}.indi_pub_grey_lit purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_pub_grey_lit stored as parquet as
select distinct p.id, coalesce(grey_lit, 0) as grey_lit
from ${stats_db_name}.publication p
left outer join (
left outer join (
select p.id, 1 as grey_lit
from ${stats_db_name}.publication p
join ${stats_db_name}.result_classifications rt on rt.id = p.id
where rt.type not in ('Article','Part of book or chapter of book','Book','Doctoral thesis','Master thesis','Data Paper', 'Thesis', 'Bachelor thesis', 'Conference object') and
not exists (select 1 from ${stats_db_name}.result_classifications rc where type ='Other literature type'
and rc.id=p.id)) tmp on p.id=tmp.id; /*EOS*/
where rt.type not in ('Article','Part of book or chapter of book','Book','Doctoral thesis','Master thesis','Data Paper', 'Thesis', 'Bachelor thesis', 'Conference object')
and not exists (select 1 from ${stats_db_name}.result_classifications rc where type ='Other literature type' and rc.id=p.id)) tmp on p.id=tmp.id; /*EOS*/
drop table if exists ${stats_db_name}.indi_pub_doi_from_crossref purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_pub_doi_from_crossref stored as parquet as
select distinct p.id, coalesce(doi_from_crossref, 0) as doi_from_crossref
from ${stats_db_name}.publication p
left outer join
(select ri.id, 1 as doi_from_crossref from ${stats_db_name}.result_instance ri
left outer join (
select ri.id, 1 as doi_from_crossref from ${stats_db_name}.result_instance ri
join ${stats_db_name}.datasource d on d.id = ri.collectedfrom
where pidtype='Digital Object Identifier' and d.name ='Crossref') tmp
on tmp.id=p.id; /*EOS*/
where pidtype='Digital Object Identifier' and d.name ='Crossref') tmp on tmp.id=p.id; /*EOS*/
-- Sprint 2 ----
drop table if exists ${stats_db_name}.indi_result_has_cc_licence purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_result_has_cc_licence stored as parquet as
select distinct r.id, (case when lic='' or lic is null then 0 else 1 end) as has_cc_license
from ${stats_db_name}.result r
left outer join (select r.id, license.type as lic from ${stats_db_name}.result r
left outer join (
select r.id, license.type as lic from ${stats_db_name}.result r
join ${stats_db_name}.result_licenses as license on license.id = r.id
where lower(license.type) LIKE '%creativecommons.org%' OR lower(license.type) LIKE '%cc %') tmp
on r.id= tmp.id; /*EOS*/
where lower(license.type) LIKE '%creativecommons.org%' OR lower(license.type) LIKE '%cc %') tmp on r.id= tmp.id; /*EOS*/
drop table if exists ${stats_db_name}.indi_result_has_cc_licence_url purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_result_has_cc_licence_url stored as parquet as
select distinct r.id, case when lic_host='' or lic_host is null then 0 else 1 end as has_cc_license_url
from ${stats_db_name}.result r
left outer join (select r.id, lower(parse_url(license.type, "HOST")) as lic_host
left outer join (
select r.id, lower(parse_url(license.type, "HOST")) as lic_host
from ${stats_db_name}.result r
join ${stats_db_name}.result_licenses as license on license.id = r.id
WHERE lower(parse_url(license.type, "HOST")) = "creativecommons.org") tmp
on r.id= tmp.id; /*EOS*/
WHERE lower(parse_url(license.type, "HOST")) = "creativecommons.org") tmp on r.id= tmp.id; /*EOS*/
drop table if exists ${stats_db_name}.indi_pub_has_abstract purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_pub_has_abstract stored as parquet as
select distinct publication.id, cast(coalesce(abstract, true) as int) has_abstract
from ${stats_db_name}.publication; /*EOS*/
drop table if exists ${stats_db_name}.indi_result_with_orcid purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_result_with_orcid stored as parquet as
select distinct r.id, coalesce(has_orcid, 0) as has_orcid
from ${stats_db_name}.result r
left outer join (select id, 1 as has_orcid from ${stats_db_name}.result_orcid) tmp
on r.id= tmp.id; /*EOS*/
left outer join (
select id, 1 as has_orcid from ${stats_db_name}.result_orcid) tmp on r.id= tmp.id; /*EOS*/
---- Sprint 3 ----
drop table if exists ${stats_db_name}.indi_funded_result_with_fundref purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_funded_result_with_fundref stored as parquet as
select distinct r.result as id, coalesce(fundref, 0) as fundref
from ${stats_db_name}.project_results r
left outer join (select distinct result, 1 as fundref from ${stats_db_name}.project_results
where provenance='Harvested') tmp
on r.result= tmp.result; /*EOS*/
-- create table indi_result_org_collab stored as parquet as
-- select o1.organization org1, o2.organization org2, count(distinct o1.id) as collaborations
-- from result_organization as o1
-- join result_organization as o2 on o1.id=o2.id and o1.organization!=o2.organization
-- group by o1.organization, o2.organization;
--
-- compute stats indi_result_org_collab;
--
create TEMPORARY VIEW tmp AS SELECT ro.organization organization, ro.id, o.name from ${stats_db_name}.result_organization ro
join ${stats_db_name}.organization o on o.id=ro.organization where o.name is not null; /*EOS*/
left outer join (
select distinct result, 1 as fundref from ${stats_db_name}.project_results where provenance='Harvested') tmp on r.result= tmp.result; /*EOS*/
drop table if exists ${stats_db_name}.indi_result_org_collab purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_result_org_collab stored as parquet as
select o1.organization org1, o1.name org1name1, o2.organization org2, o2.name org2name2, count(o1.id) as collaborations
from tmp as o1
join tmp as o2 where o1.id=o2.id and o1.organization!=o2.organization and o1.name!=o2.name
group by o1.organization, o2.organization, o1.name, o2.name; /*EOS*/
DROP VIEW if exists tmp; /*EOS*/
create TEMPORARY VIEW tmp AS
select distinct ro.organization organization, ro.id, o.name, o.country from ${stats_db_name}.result_organization ro
join ${stats_db_name}.organization o on o.id=ro.organization where country <> 'UNKNOWN' and o.name is not null; /*EOS*/
WITH tmp AS (
SELECT ro.organization organization, ro.id, o.name
from ${stats_db_name}.result_organization ro
join ${stats_db_name}.organization o on o.id=ro.organization where o.name is not null)
select o1.organization org1, o1.name org1name1, o2.organization org2, o2.name org2name2, count(o1.id) as collaborations
from tmp as o1
join tmp as o2 where o1.id=o2.id and o1.organization!=o2.organization and o1.name!=o2.name
group by o1.organization, o2.organization, o1.name, o2.name; /*EOS*/
drop table if exists ${stats_db_name}.indi_result_org_country_collab purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_result_org_country_collab stored as parquet as
select o1.organization org1,o1.name org1name1, o2.country country2, count(o1.id) as collaborations
from tmp as o1 join tmp as o2 on o1.id=o2.id
where o1.id=o2.id and o1.country!=o2.country
group by o1.organization, o1.id, o1.name, o2.country; /*EOS*/
DROP VIEW if exists tmp; /*EOS*/
create TEMPORARY VIEW tmp AS
select o.id organization, o.name, ro.project as project from ${stats_db_name}.organization o
join ${stats_db_name}.organization_projects ro on o.id=ro.id where o.name is not null; /*EOS*/
WITH tmp AS (
select distinct ro.organization organization, ro.id, o.name, o.country
from ${stats_db_name}.result_organization ro
join ${stats_db_name}.organization o on o.id=ro.organization
where country <> 'UNKNOWN' and o.name is not null)
select o1.organization org1,o1.name org1name1, o2.country country2, count(o1.id) as collaborations
from tmp as o1 join tmp as o2 on o1.id=o2.id
where o1.id=o2.id and o1.country!=o2.country
group by o1.organization, o1.id, o1.name, o2.country; /*EOS*/
drop table if exists ${stats_db_name}.indi_project_collab_org purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_project_collab_org stored as parquet as
select o1.organization org1,o1.name orgname1, o2.organization org2, o2.name orgname2, count(distinct o1.project) as collaborations
from tmp as o1
WITH tmp AS (
select o.id organization, o.name, ro.project as project
from ${stats_db_name}.organization o
join ${stats_db_name}.organization_projects ro on o.id=ro.id where o.name is not null)
select o1.organization org1,o1.name orgname1, o2.organization org2, o2.name orgname2, count(distinct o1.project) as collaborations
from tmp as o1
join tmp as o2 on o1.project=o2.project
where o1.organization<>o2.organization and o1.name<>o2.name
group by o1.name,o2.name, o1.organization, o2.organization; /*EOS*/
DROP VIEW if exists tmp; /*EOS*/
create TEMPORARY VIEW tmp AS
select o.id organization, o.name, o.country , ro.project as project from ${stats_db_name}.organization o
join ${stats_db_name}.organization_projects ro on o.id=ro.id
and o.country <> 'UNKNOWN' and o.name is not null; /*EOS*/
where o1.organization<>o2.organization and o1.name<>o2.name
group by o1.name,o2.name, o1.organization, o2.organization; /*EOS*/
drop table if exists ${stats_db_name}.indi_project_collab_org_country purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_project_collab_org_country stored as parquet as
select o1.organization org1,o1.name org1name, o2.country country2, count(distinct o1.project) as collaborations
from tmp as o1
WITH tmp AS (
select o.id organization, o.name, o.country , ro.project as project
from ${stats_db_name}.organization o
join ${stats_db_name}.organization_projects ro on o.id=ro.id and o.country <> 'UNKNOWN' and o.name is not null)
select o1.organization org1,o1.name org1name, o2.country country2, count(distinct o1.project) as collaborations
from tmp as o1
join tmp as o2 on o1.project=o2.project
where o1.organization<>o2.organization and o1.country<>o2.country
group by o1.organization, o2.country, o1.name; /*EOS*/
DROP VIEW if exists tmp; /*EOS*/
where o1.organization<>o2.organization and o1.country<>o2.country
group by o1.organization, o2.country, o1.name; /*EOS*/
drop table if exists ${stats_db_name}.indi_funder_country_collab purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_funder_country_collab stored as parquet as
with tmp as (select funder, project, country from ${stats_db_name}.organization_projects op
join ${stats_db_name}.organization o on o.id=op.id
join ${stats_db_name}.project p on p.id=op.project
where country <> 'UNKNOWN')
select f1.funder, f1.country as country1, f2.country as country2, count(distinct f1.project) as collaborations
from tmp as f1
select f1.funder, f1.country as country1, f2.country as country2, count(distinct f1.project) as collaborations
from tmp as f1
join tmp as f2 on f1.project=f2.project
where f1.country<>f2.country
group by f1.funder, f2.country, f1.country; /*EOS*/
create TEMPORARY VIEW tmp AS
select distinct country, ro.id as result from ${stats_db_name}.organization o
join ${stats_db_name}.result_organization ro on o.id=ro.organization
where country <> 'UNKNOWN' and o.name is not null; /*EOS*/
where f1.country<>f2.country
group by f1.funder, f2.country, f1.country; /*EOS*/
drop table if exists ${stats_db_name}.indi_result_country_collab purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_result_country_collab stored as parquet as
select o1.country country1, o2.country country2, count(o1.result) as collaborations
from tmp as o1
WITH tmp AS (
select distinct country, ro.id as result from ${stats_db_name}.organization o
join ${stats_db_name}.result_organization ro on o.id=ro.organization
where country <> 'UNKNOWN' and o.name is not null)
select o1.country country1, o2.country country2, count(o1.result) as collaborations
from tmp as o1
join tmp as o2 on o1.result=o2.result
where o1.country<>o2.country
group by o1.country, o2.country; /*EOS*/
where o1.country<>o2.country
group by o1.country, o2.country; /*EOS*/
DROP VIEW if exists tmp; /*EOS*/
---- Sprint 4 ----
drop table if exists ${stats_db_name}.indi_pub_diamond purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_pub_diamond stored as parquet as
select distinct pd.id, coalesce(in_diamond_journal, 0) as in_diamond_journal
from ${stats_db_name}.publication_datasources pd
left outer join (
select distinct pd.id, coalesce(in_diamond_journal, 0) as in_diamond_journal
from ${stats_db_name}.publication_datasources pd
left outer join (
select pd.id, 1 as in_diamond_journal
from ${stats_db_name}.publication_datasources pd
join ${stats_db_name}.datasource d on d.id=pd.datasource
@ -206,11 +156,10 @@ left outer join (
and (ps.journal_is_in_doaj=true or ps.journal_is_oa=true) and ps.has_apc=false) tmp on pd.id=tmp.id; /*EOS*/
drop table if exists ${stats_db_name}.indi_pub_in_transformative purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_pub_in_transformative stored as parquet as
select distinct pd.id, coalesce(is_transformative, 0) as is_transformative
from ${stats_db_name}.publication pd
left outer join (
select distinct pd.id, coalesce(is_transformative, 0) as is_transformative
from ${stats_db_name}.publication pd
left outer join (
select pd.id, 1 as is_transformative
from ${stats_db_name}.publication_datasources pd
join ${stats_db_name}.datasource d on d.id=pd.datasource
@ -218,11 +167,10 @@ left outer join (
and ps.is_transformative_journal=true) tmp on pd.id=tmp.id; /*EOS*/
drop table if exists ${stats_db_name}.indi_pub_closed_other_open purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_pub_closed_other_open stored as parquet as
select distinct ri.id, coalesce(pub_closed_other_open, 0) as pub_closed_other_open
from ${stats_db_name}.result_instance ri
left outer join (
select distinct ri.id, coalesce(pub_closed_other_open, 0) as pub_closed_other_open
from ${stats_db_name}.result_instance ri
left outer join (
select ri.id, 1 as pub_closed_other_open
from ${stats_db_name}.result_instance ri
join ${stats_db_name}.publication p on p.id=ri.id
@ -230,55 +178,52 @@ left outer join (
where d.type like '%Journal%' and ri.accessright='Closed Access' and
(p.bestlicence='Open Access' or p.bestlicence='Open Source')) tmp on tmp.id=ri.id; /*EOS*/
---- Sprint 5 ----
drop table if exists ${stats_db_name}.indi_result_no_of_copies purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_result_no_of_copies stored as parquet as
select id, count(id) as number_of_copies from ${stats_db_name}.result_instance group by id; /*EOS*/
select id, count(id) as number_of_copies
from ${stats_db_name}.result_instance
group by id; /*EOS*/
---- Sprint 6 ----
drop table if exists ${stats_db_name}.indi_pub_downloads purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_pub_downloads stored as parquet as
SELECT result_id, sum(downloads) no_downloads from openaire_prod_usage_stats.usage_stats
SELECT result_id, sum(downloads) no_downloads
from openaire_prod_usage_stats.usage_stats
join ${stats_db_name}.publication on result_id=id
where downloads>0
GROUP BY result_id
order by no_downloads desc; /*EOS*/
--ANALYZE TABLE ${stats_db_name}.indi_pub_downloads COMPUTE STATISTICS;
where downloads>0
GROUP BY result_id; /*EOS*/
drop table if exists ${stats_db_name}.indi_pub_downloads_datasource purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_pub_downloads_datasource stored as parquet as
SELECT result_id, repository_id, sum(downloads) no_downloads from openaire_prod_usage_stats.usage_stats
SELECT result_id, repository_id, sum(downloads) no_downloads
from openaire_prod_usage_stats.usage_stats
join ${stats_db_name}.publication on result_id=id
where downloads>0
GROUP BY result_id, repository_id
order by result_id; /*EOS*/
where downloads>0
GROUP BY result_id, repository_id; /*EOS*/
drop table if exists ${stats_db_name}.indi_pub_downloads_year purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_pub_downloads_year stored as parquet as
SELECT result_id, cast(substring(us.`date`, 1,4) as int) as `year`, sum(downloads) no_downloads
from openaire_prod_usage_stats.usage_stats us
join ${stats_db_name}.publication on result_id=id where downloads>0
GROUP BY result_id, substring(us.`date`, 1,4); /*EOS*/
SELECT result_id, cast(substring(us.`date`, 1,4) as int) as `year`, sum(downloads) no_downloads
from openaire_prod_usage_stats.usage_stats us
join ${stats_db_name}.publication on result_id=id where downloads>0
GROUP BY result_id, substring(us.`date`, 1,4); /*EOS*/
drop table if exists ${stats_db_name}.indi_pub_downloads_datasource_year purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_pub_downloads_datasource_year stored as parquet as
SELECT result_id, cast(substring(us.`date`, 1,4) as int) as `year`, repository_id, sum(downloads) no_downloads from openaire_prod_usage_stats.usage_stats us
join ${stats_db_name}.publication on result_id=id
where downloads>0
GROUP BY result_id, repository_id, substring(us.`date`, 1,4); /*EOS*/
SELECT result_id, cast(substring(us.`date`, 1,4) as int) as `year`, repository_id, sum(downloads) no_downloads
from openaire_prod_usage_stats.usage_stats us
join ${stats_db_name}.publication on result_id=id
where downloads>0
GROUP BY result_id, repository_id, substring(us.`date`, 1,4); /*EOS*/
---- Sprint 7 ----
drop table if exists ${stats_db_name}.indi_pub_gold_oa purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_pub_gold_oa stored as parquet as
with gold_oa as (
select distinct issn from (
with gold_oa as (
select distinct issn from (
SELECT issn_l as issn from stats_ext.issn_gold_oa_dataset_v5
UNION ALL
SELECT issn as issn from stats_ext.issn_gold_oa_dataset_v5
@ -286,8 +231,8 @@ select distinct issn from (
select issn from stats_ext.alljournals where journal_is_in_doaj=true or journal_is_oa=true
UNION ALL
select issn_l as issn from stats_ext.alljournals where journal_is_in_doaj=true or journal_is_oa=true) foo),
dd as (
select distinct * from (
dd as (
select distinct * from (
select id, issn_printed as issn from ${stats_db_name}.datasource d where d.id like '%doajarticles%'
UNION ALL
select id, issn_online as issn from ${stats_db_name}.datasource d where d.id like '%doajarticles%'
@ -295,17 +240,16 @@ select distinct * from (
select id, issn_printed as issn from ${stats_db_name}.datasource d join gold_oa on gold_oa.issn=d.issn_printed
UNION ALL
select id, issn_online as issn from ${stats_db_name}.datasource d join gold_oa on gold_oa.issn=d.issn_online) foo
)
SELECT DISTINCT pd.id, coalesce(is_gold, 0) as is_gold
FROM ${stats_db_name}.publication_datasources pd
left outer join (
)
SELECT DISTINCT pd.id, coalesce(is_gold, 0) as is_gold
FROM ${stats_db_name}.publication_datasources pd
left outer join (
select pd.id, 1 as is_gold
FROM ${stats_db_name}.publication_datasources pd
join dd on dd.id=pd.datasource
left outer join ${stats_db_name}.result_accessroute ra on ra.id = pd.id where ra.accessroute = 'gold') tmp on tmp.id=pd.id; /*EOS*/
drop table if exists ${stats_db_name}.indi_pub_hybrid_oa_with_cc purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_pub_hybrid_oa_with_cc stored as parquet as
WITH hybrid_oa AS (
SELECT issn_l, journal_is_in_doaj, journal_is_oa, issn_print as issn
@ -326,9 +270,9 @@ create table if not exists ${stats_db_name}.indi_pub_hybrid_oa_with_cc stored as
FROM ${stats_db_name}.datasource
WHERE issn_online IS NOT NULL ) as issn
WHERE LENGTH(issn) > 7)
SELECT DISTINCT pd.id, coalesce(is_hybrid_oa, 0) as is_hybrid_oa
FROM ${stats_db_name}.publication_datasources pd
LEFT OUTER JOIN (
SELECT DISTINCT pd.id, coalesce(is_hybrid_oa, 0) as is_hybrid_oa
FROM ${stats_db_name}.publication_datasources pd
LEFT OUTER JOIN (
SELECT pd.id, 1 as is_hybrid_oa from ${stats_db_name}.publication_datasources pd
JOIN ${stats_db_name}.datasource d on d.id=pd.datasource
JOIN issn on issn.id=pd.datasource
@ -337,7 +281,6 @@ LEFT OUTER JOIN (
JOIN ${stats_db_name}.indi_pub_gold_oa ga on pd.id=ga.id where cc.has_cc_license=1 and ga.is_gold=0) tmp on pd.id=tmp.id; /*EOS*/
drop table if exists ${stats_db_name}.indi_pub_hybrid purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_pub_hybrid stored as parquet as
select distinct pd.id,coalesce(is_hybrid,0) is_hybrid from ${stats_db_name}.publication pd
left outer join (
@ -349,23 +292,24 @@ left outer join (
where indi_gold.is_gold=0 and ((d.type like '%Journal%' and ri.accessright!='Closed Access' and ri.accessright!='Restricted' and ri.license is not null) or ra.accessroute='hybrid')) tmp on pd.id=tmp.id; /*EOS*/
drop table if exists ${stats_db_name}.indi_org_fairness purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_org_fairness stored as parquet as
--return results with PIDs, and rich metadata group by organization
with result_fair as
(select ro.organization organization, count(distinct ro.id) no_result_fair from ${stats_db_name}.result_organization ro
with result_fair as (
select ro.organization organization, count(distinct ro.id) no_result_fair
from ${stats_db_name}.result_organization ro
join ${stats_db_name}.result r on r.id=ro.id
--join result_pids rp on r.id=rp.id
where (title is not null) and (publisher is not null) and (abstract=true) and (year is not null) and (authors>0) and cast(year as int)>2003
group by ro.organization),
--return all results group by organization
allresults as (select ro.organization, count(distinct ro.id) no_allresults from ${stats_db_name}.result_organization ro
allresults as (
select ro.organization, count(distinct ro.id) no_allresults from ${stats_db_name}.result_organization ro
join ${stats_db_name}.result r on r.id=ro.id
where cast(year as int)>2003
group by ro.organization)
--return results_fair/all_results
select allresults.organization, result_fair.no_result_fair/allresults.no_allresults org_fairness
from allresults
select allresults.organization, result_fair.no_result_fair/allresults.no_allresults org_fairness
from allresults
join result_fair on result_fair.organization=allresults.organization; /*EOS*/
CREATE TEMPORARY VIEW result_fair as