@ -1,390 +1,255 @@
-- Sprint 1 ----
drop table if exists $ { stats_db_name } . indi_pub_green_oa purge ; /* EOS */
- - create table if not exists $ { stats_db_name } . indi_pub_green_oa stored as parquet as
- - select distinct p . id , coalesce ( green_oa , 0 ) as green_oa
- - from $ { stats_db_name } . publication p
-- left outer join (
-- select p.id, 1 as green_oa
-- from ${stats_db_name}.publication p
-- join ${stats_db_name}.result_instance ri on ri.id = p.id
-- join ${stats_db_name}.datasource on datasource.id = ri.hostedby
-- where datasource.type like '%Repository%'
-- and (ri.accessright = 'Open Access'
-- or ri.accessright = 'Embargo' or ri.accessright = 'Open Source')) tmp
-- on p.id= tmp.id;
create table if not exists $ { stats_db_name } . indi_pub_green_oa stored as parquet as
select distinct p . id , coalesce ( green_oa , 0 ) as green_oa
from $ { stats_db_name } . publication p
left outer join (
left outer join (
select p . id , 1 as green_oa
from $ { stats_db_name } . publication p
join $ { stats_db_name } . result_instance ri on ri . id = p . id
join $ { stats_db_name } . datasource on datasource . id = ri . hostedby
where datasource . type like ' %Repository% '
and ( ri . accessright = ' Open Access '
or ri . accessright = ' Embargo ' or ri . accessright = ' Open Source ' ) and datasource . name ! = ' Other ' ) tmp
on p . id = tmp . id ; /* EOS */
join $ { stats_db_name } . result_instance ri on ri . id = p . id
join $ { stats_db_name } . datasource on datasource . id = ri . hostedby
where datasource . type like ' %Repository% ' and ( ri . accessright = ' Open Access ' or ri . accessright = ' Embargo ' or ri . accessright = ' Open Source ' ) and datasource . name ! = ' Other ' ) tmp on p . id = tmp . id ; /* EOS */
drop table if exists $ { stats_db_name } . indi_pub_grey_lit purge ; /* EOS */
create table if not exists $ { stats_db_name } . indi_pub_grey_lit stored as parquet as
select distinct p . id , coalesce ( grey_lit , 0 ) as grey_lit
from $ { stats_db_name } . publication p
left outer join (
left outer join (
select p . id , 1 as grey_lit
from $ { stats_db_name } . publication p
join $ { stats_db_name } . result_classifications rt on rt . id = p . id
where rt . type not in ( ' Article ' , ' Part of book or chapter of book ' , ' Book ' , ' Doctoral thesis ' , ' Master thesis ' , ' Data Paper ' , ' Thesis ' , ' Bachelor thesis ' , ' Conference object ' ) and
not exists ( select 1 from $ { stats_db_name } . result_classifications rc where type = ' Other literature type '
and rc . id = p . id ) ) tmp on p . id = tmp . id ; /* EOS */
join $ { stats_db_name } . result_classifications rt on rt . id = p . id
where rt . type not in ( ' Article ' , ' Part of book or chapter of book ' , ' Book ' , ' Doctoral thesis ' , ' Master thesis ' , ' Data Paper ' , ' Thesis ' , ' Bachelor thesis ' , ' Conference object ' )
and not exists ( select 1 from $ { stats_db_name } . result_classifications rc where type = ' Other literature type ' and rc . id = p . id ) ) tmp on p . id = tmp . id ; /* EOS */
drop table if exists $ { stats_db_name } . indi_pub_doi_from_crossref purge ; /* EOS */
create table if not exists $ { stats_db_name } . indi_pub_doi_from_crossref stored as parquet as
select distinct p . id , coalesce ( doi_from_crossref , 0 ) as doi_from_crossref
from $ { stats_db_name } . publication p
left outer join
( select ri . id , 1 as doi_from_crossref from $ { stats_db_name } . result_instance ri
join $ { stats_db_name } . datasource d on d . id = ri . collectedfrom
where pidtype = ' Digital Object Identifier ' and d . name = ' Crossref ' ) tmp
on tmp . id = p . id ; /* EOS */
left outer join (
select ri . id , 1 as doi_from_crossref from $ { stats_db_name } . result_instance ri
join $ { stats_db_name } . datasource d on d . id = ri . collectedfrom
where pidtype = ' Digital Object Identifier ' and d . name = ' Crossref ' ) tmp on tmp . id = p . id ; /* EOS */
-- Sprint 2 ----
drop table if exists $ { stats_db_name } . indi_result_has_cc_licence purge ; /* EOS */
create table if not exists $ { stats_db_name } . indi_result_has_cc_licence stored as parquet as
select distinct r . id , ( case when lic = ' ' or lic is null then 0 else 1 end ) as has_cc_license
from $ { stats_db_name } . result r
left outer join ( select r . id , license . type as lic from $ { stats_db_name } . result r
join $ { stats_db_name } . result_licenses as license on license . id = r . id
where lower ( license . type ) LIKE ' %creativecommons.org% ' OR lower ( license . type ) LIKE ' %cc-% ' ) tmp
on r . id = tmp . id ; /* EOS */
left outer join (
select r . id , license . type as lic from $ { stats_db_name } . result r
join $ { stats_db_name } . result_licenses as license on license . id = r . id
where lower ( license . type ) LIKE ' %creativecommons.org% ' OR lower ( license . type ) LIKE ' %cc % ' ) tmp on r . id = tmp . id ; /* EOS */
drop table if exists $ { stats_db_name } . indi_result_has_cc_licence_url purge ; /* EOS */
create table if not exists $ { stats_db_name } . indi_result_has_cc_licence_url stored as parquet as
select distinct r . id , case when lic_host = ' ' or lic_host is null then 0 else 1 end as has_cc_license_url
from $ { stats_db_name } . result r
left outer join ( select r . id , lower ( parse_url ( license . type , " HOST " ) ) as lic_host
from $ { stats_db_name } . result r
join $ { stats_db_name } . result _licenses as license on license . id = r . id
WHERE lower ( parse_url ( license . type , " HOST " ) ) = " creativecommons.org " ) tmp
on r . id = tmp . id ; /* EOS */
left outer join (
select r . id , lower ( parse_url ( license . type , " HOST " ) ) as lic_host
from $ { stats_db_name } . result r
join $ { stats_db_name } . result_licenses as license on license . id = r . id
WHERE lower ( parse_url ( license . type , " HOST " ) ) = " creativecommons.org " ) tmp on r . id = tmp . id ; /* EOS */
drop table if exists $ { stats_db_name } . indi_pub_has_abstract purge ; /* EOS */
create table if not exists $ { stats_db_name } . indi_pub_has_abstract stored as parquet as
select distinct publication . id , cast ( coalesce ( abstract , true ) as int ) has_abstract
from $ { stats_db_name } . publication ; /* EOS */
drop table if exists $ { stats_db_name } . indi_result_with_orcid purge ; /* EOS */
create table if not exists $ { stats_db_name } . indi_result_with_orcid stored as parquet as
select distinct r . id , coalesce ( has_orcid , 0 ) as has_orcid
from $ { stats_db_name } . result r
left outer join ( select id , 1 as has_orcid from $ { stats_db_name } . result_orcid ) tmp
on r . id = tmp . id ; /* EOS */
left outer join (
select id , 1 as has_orcid from $ { stats_db_name } . result_orcid ) tmp on r . id = tmp . id ; /* EOS */
- - -- Sprint 3 ----
- - -- Sprint 3 ----
drop table if exists $ { stats_db_name } . indi_funded_result_with_fundref purge ; /* EOS */
create table if not exists $ { stats_db_name } . indi_funded_result_with_fundref stored as parquet as
select distinct r . result as id , coalesce ( fundref , 0 ) as fundref
from $ { stats_db_name } . project_results r
left outer join ( select distinct result , 1 as fundref from $ { stats_db_name } . project_results
where provenance = ' Harvested ' ) tmp
on r . result = tmp . result ; /* EOS */
-- create table indi_result_org_collab stored as parquet as
-- select o1.organization org1, o2.organization org2, count(distinct o1.id) as collaborations
-- from result_organization as o1
-- join result_organization as o2 on o1.id=o2.id and o1.organization!=o2.organization
-- group by o1.organization, o2.organization;
- -
-- compute stats indi_result_org_collab;
- -
create TEMPORARY VIEW tmp AS SELECT ro . organization organization , ro . id , o . name from $ { stats_db_name } . result_organization ro
join $ { stats_db_name } . organization o on o . id = ro . organization where o . name is not null ; /* EOS */
left outer join (
select distinct result , 1 as fundref from $ { stats_db_name } . project_results where provenance = ' Harvested ' ) tmp on r . result = tmp . result ; /* EOS */
drop table if exists $ { stats_db_name } . indi_result_org_collab purge ; /* EOS */
create table if not exists $ { stats_db_name } . indi_result_org_collab stored as parquet as
select o1 . organization org1 , o1 . name org1name1 , o2 . organization org2 , o2 . name org2name2 , count ( o1 . id ) as collaborations
from tmp as o1
join tmp as o2 where o1 . id = o2 . id and o1 . organization ! = o2 . organization and o1 . name ! = o2 . name
group by o1 . organization , o2 . organization , o1 . name , o2 . name ; /* EOS */
DROP VIEW if exists tmp ; /* EOS */
create TEMPORARY VIEW tmp AS
select distinct ro . organization organization , ro . id , o . name , o . country from $ { stats_db_name } . result_organization ro
join $ { stats_db_name } . organization o on o . id = ro . organization where country < > ' UNKNOWN ' and o . name is not null ; /* EOS */
WITH tmp AS (
SELECT ro . organization organization , ro . id , o . name
from $ { stats_db_name } . result_organization ro
join $ { stats_db_name } . organization o on o . id = ro . organization where o . name is not null )
select o1 . organization org1 , o1 . name org1name1 , o2 . organization org2 , o2 . name org2name2 , count ( o1 . id ) as collaborations
from tmp as o1
join tmp as o2 where o1 . id = o2 . id and o1 . organization ! = o2 . organization and o1 . name ! = o2 . name
group by o1 . organization , o2 . organization , o1 . name , o2 . name ; /* EOS */
drop table if exists $ { stats_db_name } . indi_result_org_country_collab purge ; /* EOS */
create table if not exists $ { stats_db_name } . indi_result_org_country_collab stored as parquet as
select o1 . organization org1 , o1 . name org1name1 , o2 . country country2 , count ( o1 . id ) as collaborations
from tmp as o1 join tmp as o2 on o1 . id = o2 . id
where o1 . id = o2 . id and o1 . country ! = o2 . country
group by o1 . organization , o1 . id , o1 . name , o2 . country ; /* EOS */
drop table if exists tmp purge ; /* EOS */
create TEMPORARY VIEW tmp AS
select o . id organization , o . name , ro . project as project from $ { stats_db_name } . organization o
join $ { stats_db_name } . organization_projects ro on o . id = ro . id where o . name is not null ; /* EOS */
WITH tmp AS (
select distinct ro . organization organization , ro . id , o . name , o . country
from $ { stats_db_name } . result_organization ro
join $ { stats_db_name } . organization o on o . id = ro . organization
where country < > ' UNKNOWN ' and o . name is not null )
select o1 . organization org1 , o1 . name org1name1 , o2 . country country2 , count ( o1 . id ) as collaborations
from tmp as o1 join tmp as o2 on o1 . id = o2 . id
where o1 . id = o2 . id and o1 . country ! = o2 . country
group by o1 . organization , o1 . id , o1 . name , o2 . country ; /* EOS */
drop table if exists $ { stats_db_name } . indi_project_collab_org purge ; /* EOS */
create table if not exists $ { stats_db_name } . indi_project_collab_org stored as parquet as
select o1 . organization org1 , o1 . name orgname1 , o2 . organization org2 , o2 . name orgname2 , count ( distinct o1 . project ) as collaborations
from tmp as o1
join tmp as o2 on o1 . project = o2 . project
where o1 . organization < > o2 . organization and o1 . name < > o2 . name
group by o1 . name , o2 . name , o1 . organization , o2 . organization ; /* EOS */
DROP VIEW if exists tmp ; /* EOS */
create TEMPORARY VIEW tmp AS
select o . id organization , o . name , o . country , ro . project as project from $ { stats_db_name } . organization o
join $ { stats_db_name } . organization_projects ro on o . id = ro . id
and o . country < > ' UNKNOWN ' and o . name is not null ; /* EOS */
WITH tmp AS (
select o . id organization , o . name , ro . project as project
from $ { stats_db_name } . organization o
join $ { stats_db_name } . organization_projects ro on o . id = ro . id where o . name is not null )
select o1 . organization org1 , o1 . name orgname1 , o2 . organization org2 , o2 . name orgname2 , count ( distinct o1 . project ) as collaborations
from tmp as o1
join tmp as o2 on o1 . project = o2 . project
where o1 . organization < > o2 . organization and o1 . name < > o2 . name
group by o1 . name , o2 . name , o1 . organization , o2 . organization ; /* EOS */
drop table if exists $ { stats_db_name } . indi_project_collab_org_country purge ; /* EOS */
create table if not exists $ { stats_db_name } . indi_project_collab_org_country stored as parquet as
select o1 . organization org1 , o1 . name org1name , o2 . country country2 , count ( distinct o1 . project ) as collaborations
from tmp as o1
join tmp as o2 on o1 . project = o2 . project
where o1 . organization < > o2 . organization and o1 . country < > o2 . country
group by o1 . organization , o2 . country , o1 . name ; /* EOS */
DROP VIEW if exists tmp ; /* EOS */
WITH tmp AS (
select o . id organization , o . name , o . country , ro . project as project
from $ { stats_db_name } . organization o
join $ { stats_db_name } . organization_projects ro on o . id = ro . id and o . country < > ' UNKNOWN ' and o . name is not null )
select o1 . organization org1 , o1 . name org1name , o2 . country country2 , count ( distinct o1 . project ) as collaborations
from tmp as o1
join tmp as o2 on o1 . project = o2 . project
where o1 . organization < > o2 . organization and o1 . country < > o2 . country
group by o1 . organization , o2 . country , o1 . name ; /* EOS */
drop table if exists $ { stats_db_name } . indi_funder_country_collab purge ; /* EOS */
create table if not exists $ { stats_db_name } . indi_funder_country_collab stored as parquet as
with tmp as ( select funder , project , country from $ { stats_db_name } . organization_projects op
join $ { stats_db_name } . organization o on o . id = op . id
join $ { stats_db_name } . project p on p . id = op . project
where country < > ' UNKNOWN ' )
select f1 . funder , f1 . country as country1 , f2 . country as country2 , count ( distinct f1 . project ) as collaborations
from tmp as f1
join tmp as f2 on f1 . project = f2 . project
where f1 . country < > f2 . country
group by f1 . funder , f2 . country , f1 . country ; /* EOS */
create TEMPORARY VIEW tmp AS
select distinct country , ro . id as result from $ { stats_db_name } . organization o
join $ { stats_db_name } . result_organization ro on o . id = ro . organization
where country < > ' UNKNOWN ' and o . name is not null ; /* EOS */
select f1 . funder , f1 . country as country1 , f2 . country as country2 , count ( distinct f1 . project ) as collaborations
from tmp as f1
join tmp as f2 on f1 . project = f2 . project
where f1 . country < > f2 . country
group by f1 . funder , f2 . country , f1 . country ; /* EOS */
drop table if exists $ { stats_db_name } . indi_result_country_collab purge ; /* EOS */
create table if not exists $ { stats_db_name } . indi_result_country_collab stored as parquet as
select o1 . country country1 , o2 . country country2 , count ( o1 . result ) as collaborations
from tmp as o1
join tmp as o2 on o1 . result = o2 . result
where o1 . country < > o2 . country
group by o1 . country , o2 . country ; /* EOS */
WITH tmp AS (
select distinct country , ro . id as result from $ { stats_db_name } . organization o
join $ { stats_db_name } . result_organization ro on o . id = ro . organization
where country < > ' UNKNOWN ' and o . name is not null )
select o1 . country country1 , o2 . country country2 , count ( o1 . result ) as collaborations
from tmp as o1
join tmp as o2 on o1 . result = o2 . result
where o1 . country < > o2 . country
group by o1 . country , o2 . country ; /* EOS */
DROP VIEW if exists tmp ; /* EOS */
- - -- Sprint 4 ----
drop table if exists $ { stats_db_name } . indi_pub_diamond purge ; /* EOS */
- - create table if not exists $ { stats_db_name } . indi_pub_diamond stored as parquet as
- - select distinct pd . id , coalesce ( in_diamond_journal , 0 ) as in_diamond_journal
- - from $ { stats_db_name } . publication_datasources pd
-- left outer join (
-- select pd.id, 1 as in_diamond_journal from ${stats_db_name}.publication_datasources pd
-- join ${stats_db_name}.datasource d on d.id=pd.datasource
-- join STATS_EXT.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_online=d.issn_online)
-- and (ps.journal_is_in_doaj=true or ps.journal_is_oa=true) and ps.has_apc=false) tmp
-- on pd.id=tmp.id;
create table if not exists $ { stats_db_name } . indi_pub_diamond stored as parquet as
select distinct pd . id , coalesce ( in_diamond_journal , 0 ) as in_diamond_journal
from $ { stats_db_name } . publication_datasources pd
left outer join ( select pd . id , 1 as in_diamond_journal from $ { stats_db_name } . publication_datasources pd
join $ { stats_db_name } . datasource d on d . id = pd . datasource
join STATS_EXT . plan_s_jn ps where ( ps . issn_print = d . issn_printed and ps . issn_online = d . issn_online )
and ( ps . journal_is_in_doaj = true or ps . journal_is_oa = true ) and ps . has_apc = false ) tmp
on pd . id = tmp . id ; /* EOS */
select distinct pd . id , coalesce ( in_diamond_journal , 0 ) as in_diamond_journal
from $ { stats_db_name } . publication_datasources pd
left outer join (
select pd . id , 1 as in_diamond_journal
from $ { stats_db_name } . publication_datasources pd
join $ { stats_db_name } . datasource d on d . id = pd . datasource
join STATS_EXT . plan_s_jn ps where ( ps . issn_print = d . issn_printed and ps . issn_online = d . issn_online )
and ( ps . journal_is_in_doaj = true or ps . journal_is_oa = true ) and ps . has_apc = false ) tmp on pd . id = tmp . id ; /* EOS */
drop table if exists $ { stats_db_name } . indi_pub_in_transformative purge ; /* EOS */
create table if not exists $ { stats_db_name } . indi_pub_in_transformative stored as parquet as
select distinct pd . id , coalesce ( is_transformative , 0 ) as is_transformative
from $ { stats_db_name } . publication pd
left outer join (
select pd . id , 1 as is_transformative from $ { stats_db_name } . publication_datasources pd
join $ { stats_db_name } . datasource d on d . id = pd . datasource
join STATS_EXT . plan_s_jn ps where ( ps . issn_print = d . issn_printed and ps . issn_online = d . issn_online )
and ps . is_transformative_journal = true ) tmp
on pd . id = tmp . id ; /* EOS */
select distinct pd . id , coalesce ( is_transformative , 0 ) as is_transformative
from $ { stats_db_name } . publication pd
left outer join (
select pd . id , 1 as is_transformative
from $ { stats_db_name } . publication_datasources pd
join $ { stats_db_name } . datasource d on d . id = pd . datasource
join STATS_EXT . plan_s_jn ps where ( ps . issn_print = d . issn_printed and ps . issn_online = d . issn_online )
and ps . is_transformative_journal = true ) tmp on pd . id = tmp . id ; /* EOS */
drop table if exists $ { stats_db_name } . indi_pub_closed_other_open purge ; /* EOS */
create table if not exists $ { stats_db_name } . indi_pub_closed_other_open stored as parquet as
select distinct ri . id , coalesce ( pub_closed_other_open , 0 ) as pub_closed_other_open from $ { stats_db_name } . result_instance ri
left outer join
( select ri . id , 1 as pub_closed_other_open from $ { stats_db_name } . result_instance ri
join $ { stats_db_name } . publication p on p . id = ri . id
join $ { stats_db_name } . datasource d on ri . hostedby = d . id
where d . type like ' %Journal% ' and ri . accessright = ' Closed Access ' and
( p . bestlicence = ' Open Access ' or p . bestlicence = ' Open Source ' ) ) tmp
on tmp . id = ri . id ; /* EOS */
select distinct ri . id , coalesce ( pub_closed_other_open , 0 ) as pub_closed_other_open
from $ { stats_db_name } . result_instance ri
left outer join (
select ri . id , 1 as pub_closed_other_open
from $ { stats_db_name } . result_instance ri
join $ { stats_db_name } . publication p on p . id = ri . id
join $ { stats_db_name } . datasource d on ri . hostedby = d . id
where d . type like ' %Journal% ' and ri . accessright = ' Closed Access ' and
( p . bestlicence = ' Open Access ' or p . bestlicence = ' Open Source ' ) ) tmp on tmp . id = ri . id ; /* EOS */
- - -- Sprint 5 ----
drop table if exists $ { stats_db_name } . indi_result_no_of_copies purge ; /* EOS */
create table if not exists $ { stats_db_name } . indi_result_no_of_copies stored as parquet as
select id , count ( id ) as number_of_copies from $ { stats_db_name } . result_instance group by id ; /* EOS */
select id , count ( id ) as number_of_copies
from $ { stats_db_name } . result_instance
group by id ; /* EOS */
- - -- Sprint 6 ----
drop table if exists $ { stats_db_name } . indi_pub_downloads purge ; /* EOS */
create table if not exists $ { stats_db_name } . indi_pub_downloads stored as parquet as
SELECT result_id , sum ( downloads ) no_downloads from openaire_prod_usage_stats . usage_stats
join $ { stats_db_name } . publication on result_id = id
where downloads > 0
GROUP BY result_id
order by no_downloads desc ; /* EOS */
- - ANALYZE TABLE $ { stats_db_name } . indi_pub_downloads COMPUTE STATISTICS ;
SELECT result_id , sum ( downloads ) no_downloads
from openaire_prod_usage_stats . usage_stats
join $ { stats_db_name } . publication on result_id = id
where downloads > 0
GROUP BY result_id ; /* EOS */
drop table if exists $ { stats_db_name } . indi_pub_downloads_datasource purge ; /* EOS */
create table if not exists $ { stats_db_name } . indi_pub_downloads_datasource stored as parquet as
SELECT result_id , repository_id , sum ( downloads ) no_downloads from openaire_prod_usage_stats . usage_stats
join $ { stats_db_name } . publication on result_id = id
where downloads > 0
GROUP BY result_id , repository_id
order by result _id; /* EOS */
SELECT result_id , repository_id , sum ( downloads ) no_downloads
from openaire_prod_usage_stats . usage_stats
join $ { stats_db_name } . publication on result_id = id
where downloads > 0
GROUP BY result_id , repository _id; /* EOS */
drop table if exists $ { stats_db_name } . indi_pub_downloads_year purge ; /* EOS */
create table if not exists $ { stats_db_name } . indi_pub_downloads_year stored as parquet as
SELECT result_id , cast ( substring ( us . ` date ` , 1 , 4 ) as int ) as ` year ` , sum ( downloads ) no_downloads
from openaire_prod_usage_stats . usage_stats us
join $ { stats_db_name } . publication on result_id = id where downloads > 0
GROUP BY result_id , substring ( us . ` date ` , 1 , 4 ) ; /* EOS */
SELECT result_id , cast ( substring ( us . ` date ` , 1 , 4 ) as int ) as ` year ` , sum ( downloads ) no_downloads
from openaire_prod_usage_stats . usage_stats us
join $ { stats_db_name } . publication on result_id = id where downloads > 0
GROUP BY result_id , substring ( us . ` date ` , 1 , 4 ) ; /* EOS */
drop table if exists $ { stats_db_name } . indi_pub_downloads_datasource_year purge ; /* EOS */
create table if not exists $ { stats_db_name } . indi_pub_downloads_datasource_year stored as parquet as
SELECT result_id , cast ( substring ( us . ` date ` , 1 , 4 ) as int ) as ` year ` , repository_id , sum ( downloads ) no_downloads from openaire_prod_usage_stats . usage_stats us
join $ { stats_db_name } . publication on result_id = id
where downloads > 0
GROUP BY result_id , repository_id , substring ( us . ` date ` , 1 , 4 ) ; /* EOS */
SELECT result_id , cast ( substring ( us . ` date ` , 1 , 4 ) as int ) as ` year ` , repository_id , sum ( downloads ) no_downloads
from openaire_prod_usage_stats . usage_stats us
join $ { stats_db_name } . publication on result_id = id
where downloads > 0
GROUP BY result_id , repository_id , substring ( us . ` date ` , 1 , 4 ) ; /* EOS */
- - -- Sprint 7 ----
drop table if exists $ { stats_db_name } . indi_pub_gold_oa purge ; /* EOS */
- - create table if not exists $ { stats_db_name } . indi_pub_gold_oa stored as parquet as
-- WITH gold_oa AS ( SELECT
-- issn_l,
-- journal_is_in_doaj,
-- journal_is_oa,
-- issn_1 as issn
-- FROM
-- STATS_EXT.oa_journals
-- WHERE
-- issn_1 != ""
-- UNION
-- ALL SELECT
-- issn_l,
-- journal_is_in_doaj,
-- journal_is_oa,
-- issn_2 as issn
-- FROM
-- STATS_EXT.oa_journals
-- WHERE
-- issn_2 != "" ), issn AS ( SELECT
-- *
-- FROM
- - ( SELECT
-- id,
-- issn_printed as issn
-- FROM
-- ${stats_db_name}.datasource
-- WHERE
-- issn_printed IS NOT NULL
-- UNION ALL
-- SELECT
-- id,
-- issn_online as issn
-- FROM
-- ${stats_db_name}.datasource
-- WHERE
-- issn_online IS NOT NULL or id like '%doajarticles%') as issn
-- WHERE
-- LENGTH(issn) > 7)
- - SELECT
-- DISTINCT pd.id, coalesce(is_gold, 0) as is_gold
- - FROM
-- ${stats_db_name}.publication_datasources pd
-- left outer join(
-- select pd.id, 1 as is_gold FROM ${stats_db_name}.publication_datasources pd
-- JOIN issn on issn.id=pd.datasource
-- JOIN gold_oa on issn.issn = gold_oa.issn) tmp
-- on pd.id=tmp.id;
- - create table if not exists $ { stats_db_name } . indi_pub_gold_oa stored as parquet as
- - with gold_oa as (
- - SELECT issn , issn_l from stats_ext . issn_gold_oa_dataset_v5 ) ,
- - issn AS ( SELECT * FROM
- - ( SELECT id , issn_printed as issn FROM $ { stats_db_name } . datasource
- - WHERE issn_printed IS NOT NULL
- - UNION ALL
- - SELECT id , issn_online as issn FROM $ { stats_db_name } . datasource
- - WHERE issn_online IS NOT NULL or id like ' %doajarticles% ' ) as issn
- - WHERE LENGTH ( issn ) > 7 ) ,
- - alljournals AS ( select issn , issn_l from stats_ext . alljournals
- - where journal_is_in_doaj = true or journal_is_oa = true )
- - SELECT DISTINCT pd . id , coalesce ( is_gold , 0 ) as is_gold
- - FROM $ { stats_db_name } . publication_datasources pd
- - left outer join (
- - select pd . id , 1 as is_gold FROM $ { stats_db_name } . publication_datasources pd
- - JOIN issn on issn . id = pd . datasource
- - JOIN gold_oa on issn . issn = gold_oa . issn
- - join alljournals on issn . issn = alljournals . issn
- - left outer join $ { stats_db_name } . result_instance ri on ri . id = pd . id
- - and ri . accessright ! = ' Closed Access ' and ri . accessright_uw = ' gold ' ) tmp
- - on pd . id = tmp . id ;
create table if not exists $ { stats_db_name } . indi_pub_gold_oa stored as parquet as
with gold_oa as (
select distinct issn from (
SELECT issn_l as issn from stats_ext . issn_gold_oa_dataset_v5
UNION ALL
SELECT issn as issn from stats_ext . issn_gold_oa_dataset_v5
UNION ALL
select issn from stats_ext . alljournals where journal_is_in_doaj = true or journal_is_oa = true
UNION ALL
select issn_l as issn from stats_ext . alljournals where journal_is_in_doaj = true or journal_is_oa = true ) foo ) ,
dd as (
select distinct * from (
select id , issn_printed as issn from $ { stats_db_name } . datasource d where d . id like ' %doajarticles% '
UNION ALL
select id , issn_online as issn from $ { stats_db_name } . datasource d where d . id like ' %doajarticles% '
UNION ALL
select id , issn_printed as issn from $ { stats_db_name } . datasource d join gold_oa on gold_oa . issn = d . issn_printed
UNION ALL
select id , issn_online as issn from $ { stats_db_name } . datasource d join gold_oa on gold_oa . issn = d . issn_online ) foo
)
SELECT DISTINCT pd . id , coalesce ( is_gold , 0 ) as is_gold
FROM $ { stats_db_name } . publication_datasources pd
left outer join (
select pd . id , 1 as is_gold
FROM $ { stats_db_name } . publication_datasources pd
join dd on dd . id = pd . datasource
left outer join $ { stats_db_name } . result_accessroute ra on ra . id = pd . id where ra . accessroute = ' gold ' ) tmp on tmp . id = pd . id ; /* EOS */
with gold_oa as (
select distinct issn from (
SELECT issn_l as issn from stats_ext . issn_gold_oa_dataset_v5
UNION ALL
SELECT issn as issn from stats_ext . issn_gold_oa_dataset_v5
UNION ALL
select issn from stats_ext . alljournals where journal_is_in_doaj = true or journal_is_oa = true
UNION ALL
select issn_l as issn from stats_ext . alljournals where journal_is_in_doaj = true or journal_is_oa = true ) foo ) ,
dd as (
select distinct * from (
select id , issn_printed as issn from $ { stats_db_name } . datasource d where d . id like ' %doajarticles% '
UNION ALL
select id , issn_online as issn from $ { stats_db_name } . datasource d where d . id like ' %doajarticles% '
UNION ALL
select id , issn_printed as issn from $ { stats_db_name } . datasource d join gold_oa on gold_oa . issn = d . issn_printed
UNION ALL
select id , issn_online as issn from $ { stats_db_name } . datasource d join gold_oa on gold_oa . issn = d . issn_online ) foo
)
SELECT DISTINCT pd . id , coalesce ( is_gold , 0 ) as is_gold
FROM $ { stats_db_name } . publication_datasources pd
left outer join (
select pd . id , 1 as is_gold
FROM $ { stats_db_name } . publication_datasources pd
join dd on dd . id = pd . datasource
left outer join $ { stats_db_name } . result_accessroute ra on ra . id = pd . id where ra . accessroute = ' gold ' ) tmp on tmp . id = pd . id ; /* EOS */
drop table if exists $ { stats_db_name } . indi_pub_hybrid_oa_with_cc purge ; /* EOS */
create table if not exists $ { stats_db_name } . indi_pub_hybrid_oa_with_cc stored as parquet as
WITH hybrid_oa AS (
SELECT issn_l , journal_is_in_doaj , journal_is_oa , issn_print as issn
@ -395,121 +260,57 @@ create table if not exists ${stats_db_name}.indi_pub_hybrid_oa_with_cc stored as
FROM STATS_EXT . plan_s_jn
WHERE issn_online ! = " " and ( journal_is_in_doaj = FALSE OR journal_is_oa = FALSE ) ) ,
issn AS (
SELECT *
FROM (
SELECT id , issn_printed as issn
FROM $ { stats_db_name } . datasource
WHERE issn_printed IS NOT NULL
UNION ALL
SELECT id , issn_online as issn
FROM $ { stats_db_name } . datasource
WHERE issn_online IS NOT NULL ) as issn
WHERE LENGTH ( issn ) > 7 )
SELECT DISTINCT pd . id , coalesce ( is_hybrid_oa , 0 ) as is_hybrid_oa
FROM $ { stats_db_name } . publication_datasources pd
LEFT OUTER JOIN (
SELECT pd . id , 1 as is_hybrid_oa from $ { stats_db_name } . publication_datasources pd
JOIN $ { stats_db_name } . datasource d on d . id = pd . datasource
JOIN issn on issn . id = pd . datasource
JOIN hybrid_oa ON issn . issn = hybrid_oa . issn
JOIN $ { stats_db_name } . indi_result_has_cc_licence cc on pd . id = cc . id
JOIN $ { stats_db_name } . indi_pub_gold_oa ga on pd . id = ga . id
where cc . has_cc_license = 1 and ga . is_gold = 0 ) tmp on pd . id = tmp . id ; /* EOS */
SELECT *
FROM (
SELECT id , issn_printed as issn
FROM $ { stats_db_name } . datasource
WHERE issn_printed IS NOT NULL
UNION ALL
SELECT id , issn_online as issn
FROM $ { stats_db_name } . datasource
WHERE issn_online IS NOT NULL ) as issn
WHERE LENGTH ( issn ) > 7 )
SELECT DISTINCT pd . id , coalesce ( is_hybrid_oa , 0 ) as is_hybrid_oa
FROM $ { stats_db_name } . publication_datasources pd
LEFT OUTER JOIN (
SELECT pd . id , 1 as is_hybrid_oa from $ { stats_db_name } . publication_datasources pd
JOIN $ { stats_db_name } . datasource d on d . id = pd . datasource
JOIN issn on issn . id = pd . datasource
JOIN hybrid_oa ON issn . issn = hybrid_oa . issn
JOIN $ { stats_db_name } . indi_result_has_cc_licence cc on pd . id = cc . id
JOIN $ { stats_db_name } . indi_pub_gold_oa ga on pd . id = ga . id where cc . has_cc_license = 1 and ga . is_gold = 0 ) tmp on pd . id = tmp . id ; /* EOS */
drop table if exists $ { stats_db_name } . indi_pub_hybrid purge ; /* EOS */
- - create table if not exists $ { stats_db_name } . indi_pub_hybrid stored as parquet as
-- WITH gold_oa AS ( SELECT
-- issn_l,
-- journal_is_in_doaj,
-- journal_is_oa,
-- issn_1 as issn,
-- has_apc
-- FROM
-- STATS_EXT.oa_journals
-- WHERE
-- issn_1 != ""
-- UNION
-- ALL SELECT
-- issn_l,
-- journal_is_in_doaj,
-- journal_is_oa,
-- issn_2 as issn,
-- has_apc
-- FROM
-- STATS_EXT.oa_journals
-- WHERE
-- issn_2 != "" ), issn AS ( SELECT
-- *
-- FROM
- - ( SELECT
-- id,
-- issn_printed as issn
-- FROM
-- ${stats_db_name}.datasource
-- WHERE
-- issn_printed IS NOT NULL
-- UNION ALL
-- SELECT
-- id,
-- issn_online as issn
-- FROM
-- ${stats_db_name}.datasource
-- WHERE
-- issn_online IS NOT NULL or id like '%doajarticles%') as issn
-- WHERE
-- LENGTH(issn) > 7)
- - select distinct pd . id , coalesce ( is_hybrid , 0 ) as is_hybrid
- - from $ { stats_db_name } . publication_datasources pd
-- left outer join (
-- select pd.id, 1 as is_hybrid from ${stats_db_name}.publication_datasources pd
-- join ${stats_db_name}.datasource d on d.id=pd.datasource
-- join issn on issn.id=pd.datasource
-- join gold_oa on issn.issn=gold_oa.issn
-- where (gold_oa.journal_is_in_doaj=false or gold_oa.journal_is_oa=false))tmp
-- on pd.id=tmp.id;
- - create table if not exists $ { stats_db_name } . indi_pub_hybrid stored as parquet as
- - select distinct pd . id , coalesce ( is_hybrid , 0 ) is_hybrid from $ { stats_db_name } . publication_datasources pd
- - left outer join ( select pd . id , 1 as is_hybrid from $ { stats_db_name } . publication_datasources pd
- - join $ { stats_db_name } . datasource d on pd . datasource = d . id
- - join $ { stats_db_name } . result_instance ri on ri . id = pd . id
- - join $ { stats_db_name } . indi_pub_gold_oa indi_gold on indi_gold . id = pd . id
- - join $ { stats_db_name } . result_accessroute ra on ra . id = pd . id
- - where d . type like ' %Journal% ' and ri . accessright ! = ' Closed Access ' and ( ri . accessright_uw ! = ' gold '
- - or indi_gold . is_gold = 0 ) and ( ra . accessroute = ' hybrid ' or ri . license is not null ) ) tmp
- - on pd . id = tmp . id ;
create table if not exists $ { stats_db_name } . indi_pub_hybrid stored as parquet as
select distinct pd . id , coalesce ( is_hybrid , 0 ) is_hybrid from $ { stats_db_name } . publication pd
left outer join ( select pd . id , 1 as is_hybrid from $ { stats_db_name } . publication pd
join $ { stats_db_name } . result_instance ri on ri . id = pd . id
join $ { stats_db_name } . indi_pub_gold_oa indi_gold on indi_gold . id = pd . id
join $ { stats_db_name } . result_accessroute ra on ra . id = pd . id
join $ { stats_db_name } . datasource d on d . id = ri . hostedby
where indi_gold . is_gold = 0 and ( ( d . type like ' %Journal% ' and ri . accessright ! = ' Closed Access ' and ri . accessright ! = ' Restricted ' and ri . license is not null ) or
ra . accessroute = ' hybrid ' ) ) tmp
on pd . id = tmp . id ; /* EOS */
left outer join (
select pd . id , 1 as is_hybrid from $ { stats_db_name } . publication pd
join $ { stats_db_name } . result_instance ri on ri . id = pd . id
join $ { stats_db_name } . indi_pub_gold_oa indi_gold on indi_gold . id = pd . id
join $ { stats_db_name } . result_accessroute ra on ra . id = pd . id
join $ { stats_db_name } . datasource d on d . id = ri . hostedby
where indi_gold . is_gold = 0 and ( ( d . type like ' %Journal% ' and ri . accessright ! = ' Closed Access ' and ri . accessright ! = ' Restricted ' and ri . license is not null ) or ra . accessroute = ' hybrid ' ) ) tmp on pd . id = tmp . id ; /* EOS */
drop table if exists $ { stats_db_name } . indi_org_fairness purge ; /* EOS */
create table if not exists $ { stats_db_name } . indi_org_fairness stored as parquet as
- - return results with PIDs , and rich metadata group by organization
with result_fair as
( select ro . organization organization , count ( distinct ro . id ) no_result_fair from $ { stats_db_name } . result_organization ro
join $ { stats_db_name } . result r on r . id = ro . id
with result_fair as (
select ro . organization organization , count ( distinct ro . id ) no_result_fair
from $ { stats_db_name } . result_organization ro
join $ { stats_db_name } . result r on r . id = ro . id
- - join result_pids rp on r . id = rp . id
where ( title is not null ) and ( publisher is not null ) and ( abstract = true ) and ( year is not null ) and ( authors > 0 ) and cast ( year as int ) > 2003
group by ro . organization ) ,
where ( title is not null ) and ( publisher is not null ) and ( abstract = true ) and ( year is not null ) and ( authors > 0 ) and cast ( year as int ) > 2003
group by ro . organization ) ,
- - return all results group by organization
allresults as ( select ro . organization , count ( distinct ro . id ) no_allresults from $ { stats_db_name } . result_organization ro
join $ { stats_db_name } . result r on r . id = ro . id
where cast ( year as int ) > 2003
group by ro . organization )
allresults as (
select ro . organization , count ( distinct ro . id ) no_allresults from $ { stats_db_name } . result_organization ro
join $ { stats_db_name } . result r on r . id = ro . id
where cast ( year as int ) > 2003
group by ro . organization )
- - return results_fair / all_results
select allresults . organization , result_fair . no_result_fair / allresults . no_allresults org_fairness
from allresults
join result_fair on result_fair . organization = allresults . organization ; /* EOS */
select allresults . organization , result_fair . no_result_fair / allresults . no_allresults org_fairness
from allresults
join result_fair on result_fair . organization = allresults . organization ; /* EOS */
CREATE TEMPORARY VIEW result_fair as
select ro . organization organization , count ( distinct ro . id ) no_result_fair
@ -534,8 +335,8 @@ select ar.organization, rf.no_result_fair/ar.no_allresults org_fairness
from allresults ar
join result_fair rf on rf . organization = ar . organization ; /* EOS */
DROP VIEW result_fair ; /* EOS */
DROP VIEW allresults ; /* EOS */
DROP VIEW result_fair ;
DROP VIEW allresults ;
CREATE TEMPORARY VIEW result_fair as
select year , ro . organization organization , count ( distinct ro . id ) no_result_fair from $ { stats_db_name } . result_organization ro
@ -859,19 +660,6 @@ drop view pub_fos_totals; /*EOS*/
drop table if exists $ { stats_db_name } . indi_pub_bronze_oa purge ; /* EOS */
- - create table if not exists $ { stats_db_name } . indi_pub_bronze_oa stored as parquet as
- - select distinct p . id , coalesce ( is_bronze_oa , 0 ) as is_bronze_oa
- - from $ { stats_db_name } . publication p
- - left outer join
- - ( select p . id , 1 as is_bronze_oa from $ { stats_db_name } . publication p
- - join $ { stats_db_name } . indi_result_has_cc_licence cc on cc . id = p . id
- - join $ { stats_db_name } . indi_pub_gold_oa ga on ga . id = p . id
- - join $ { stats_db_name } . result_instance ri on ri . id = p . id
- - join $ { stats_db_name } . datasource d on d . id = ri . hostedby
- - where cc . has_cc_license = 0 and ga . is_gold = 0
- - and ( d . type = ' Journal ' or d . type = ' Journal Aggregator/Publisher ' )
- - and ri . accessright = ' Open Access ' ) tmp on tmp . id = p . id ;
create table $ { stats_db_name } . indi_pub_bronze_oa stored as parquet as
select distinct pd . id , coalesce ( is_bronze_oa , 0 ) is_bronze_oa from $ { stats_db_name } . publication pd
left outer join ( select pd . id , 1 as is_bronze_oa from $ { stats_db_name } . publication pd
@ -1189,7 +977,7 @@ union all
select pf . name from stats_ext . insitutions_for_publicly_funded pf
join $ { stats_db_name } . project p on p . funder = pf . name
union all
select pf . name from stats_ext . insitutions_for_publicly_funded pf
select o p. name from stats_ext . insitutions_for_publicly_funded pf
join org_names_pids op on ( op . name = pf . name or op . pid = pf . ror )
and pf . publicly_funded = ' yes ' ) foo )
select distinct p . id , coalesce ( publicly_funded , 0 ) as publicly_funded
@ -1199,3 +987,53 @@ select distinct ro.id, 1 as publicly_funded from ${stats_db_name}.result_organiz
join $ { stats_db_name } . organization o on o . id = ro . organization
join publicly_funded_orgs pfo on o . name = pfo . name ) tmp on p . id = tmp . id ; /* EOS */
drop table if exists $ { stats_db_name } . indi_pub_green_with_license purge ; /* EOS */
create table $ { stats_db_name } . indi_pub_green_with_license stored as parquet as
select distinct p . id , coalesce ( green_with_license , 0 ) as green_with_license
from $ { stats_db_name } . publication p
left outer join (
select distinct p . id , 1 as green_with_license from $ { stats_db_name } . publication p
join $ { stats_db_name } . result_instance ri on ri . id = p . id
join $ { stats_db_name } . datasource on datasource . id = ri . hostedby
where ri . license is not null and datasource . type like ' %Repository% ' and datasource . name ! = ' Other ' ) tmp on p . id = tmp . id ; /* EOS */
drop table if exists $ { stats_db_name } . result_country purge ; /* EOS */
create table $ { stats_db_name } . result_country stored as parquet as
select distinct ro . id , coalesce ( o . country , f . country )
from $ { stats_db_name } . result_organization ro
left outer join $ { stats_db_name } . organization o on o . id = ro . organization
left outer join $ { stats_db_name } . result_projects rp on rp . id = ro . id
left outer join $ { stats_db_name } . project p on p . id = rp . project
left outer join $ { stats_db_name } . funder f on f . name = p . funder
where coalesce ( o . country , f . country ) IS NOT NULL ;
drop table if exists $ { stats_db_name } . indi_result_oa_with_license purge ; /* EOS */
create table $ { stats_db_name } . indi_result_oa_with_license stored as parquet as
select distinct r . id , coalesce ( oa_with_license , 0 ) as oa_with_license
from $ { stats_db_name } . result r
left outer join ( select distinct r . id , 1 as oa_with_license from $ { stats_db_name } . result r
join $ { stats_db_name } . result_licenses rl on rl . id = r . id where r . bestlicence = ' Open Access ' ) tmp on r . id = tmp . id ; /* EOS */
drop table if exists $ { stats_db_name } . indi_result_oa_without_license purge ; /* EOS */
create table $ { stats_db_name } . indi_result_oa_without_license stored as parquet as
with without_license as
( select distinct id from $ { stats_db_name } . indi_result_oa_with_license
where oa_with_license = 0 )
select distinct r . id , coalesce ( oa_without_license , 0 ) as oa_without_license
from $ { stats_db_name } . result r
left outer join ( select distinct r . id , 1 as oa_without_license
from $ { stats_db_name } . result r
join without_license wl on wl . id = r . id
where r . bestlicence = ' Open Access ' ) tmp on r . id = tmp . id ; /* EOS */
drop table if exists $ { stats_db_name } . indi_result_under_transformative purge ; /* EOS */
create table $ { stats_db_name } . indi_result_under_transformative stored as parquet as
with transformative_dois as (
select distinct doi from stats_ext . transformative_facts )
select distinct r . id , coalesce ( under_transformative , 0 ) as under_transformative
from $ { stats_db_name } . result r
left outer join (
select distinct rp . id , 1 as under_transformative
from $ { stats_db_name } . result_pids rp join $ { stats_db_name } . result r on r . id = rp . id
join transformative_dois td on td . doi = rp . pid ) tmp on r . id = tmp . id ; /* EOS */