2022-08-02 12:39:34 +02:00
-- Sprint 1 ----
2024-01-26 01:04:48 +01:00
drop table if exists $ { stats_db_name } . indi_pub_green_oa purge ; /* EOS */
2023-07-13 14:25:00 +02:00
create table if not exists $ { stats_db_name } . indi_pub_green_oa stored as parquet as
2021-12-20 18:23:57 +01:00
select distinct p . id , coalesce ( green_oa , 0 ) as green_oa
2023-07-13 14:25:00 +02:00
from $ { stats_db_name } . publication p
2024-03-22 09:16:49 +01:00
left outer join (
2022-08-02 12:39:34 +02:00
select p . id , 1 as green_oa
2023-07-13 14:25:00 +02:00
from $ { stats_db_name } . publication p
2024-03-22 09:16:49 +01:00
join $ { stats_db_name } . result_instance ri on ri . id = p . id
join $ { stats_db_name } . datasource on datasource . id = ri . hostedby
where datasource . type like ' %Repository% ' and ( ri . accessright = ' Open Access ' or ri . accessright = ' Embargo ' or ri . accessright = ' Open Source ' ) and datasource . name ! = ' Other ' ) tmp on p . id = tmp . id ; /* EOS */
2021-06-29 15:31:51 +02:00
2024-01-26 01:04:48 +01:00
drop table if exists $ { stats_db_name } . indi_pub_grey_lit purge ; /* EOS */
2023-07-13 14:25:00 +02:00
create table if not exists $ { stats_db_name } . indi_pub_grey_lit stored as parquet as
2021-06-29 15:31:51 +02:00
select distinct p . id , coalesce ( grey_lit , 0 ) as grey_lit
2023-07-13 14:25:00 +02:00
from $ { stats_db_name } . publication p
2024-03-22 09:16:49 +01:00
left outer join (
2022-08-02 12:39:34 +02:00
select p . id , 1 as grey_lit
2023-07-13 14:25:00 +02:00
from $ { stats_db_name } . publication p
2024-03-22 09:16:49 +01:00
join $ { stats_db_name } . result_classifications rt on rt . id = p . id
where rt . type not in ( ' Article ' , ' Part of book or chapter of book ' , ' Book ' , ' Doctoral thesis ' , ' Master thesis ' , ' Data Paper ' , ' Thesis ' , ' Bachelor thesis ' , ' Conference object ' )
and not exists ( select 1 from $ { stats_db_name } . result_classifications rc where type = ' Other literature type ' and rc . id = p . id ) ) tmp on p . id = tmp . id ; /* EOS */
2021-06-29 15:31:51 +02:00
2024-01-26 01:04:48 +01:00
drop table if exists $ { stats_db_name } . indi_pub_doi_from_crossref purge ; /* EOS */
2023-07-13 14:25:00 +02:00
create table if not exists $ { stats_db_name } . indi_pub_doi_from_crossref stored as parquet as
2021-12-20 18:23:57 +01:00
select distinct p . id , coalesce ( doi_from_crossref , 0 ) as doi_from_crossref
2023-07-13 14:25:00 +02:00
from $ { stats_db_name } . publication p
2024-03-22 09:16:49 +01:00
left outer join (
select ri . id , 1 as doi_from_crossref from $ { stats_db_name } . result_instance ri
join $ { stats_db_name } . datasource d on d . id = ri . collectedfrom
where pidtype = ' Digital Object Identifier ' and d . name = ' Crossref ' ) tmp on tmp . id = p . id ; /* EOS */
2021-06-29 15:31:51 +02:00
2022-08-02 12:39:34 +02:00
-- Sprint 2 ----
2024-01-26 01:04:48 +01:00
drop table if exists $ { stats_db_name } . indi_result_has_cc_licence purge ; /* EOS */
2023-07-13 14:25:00 +02:00
create table if not exists $ { stats_db_name } . indi_result_has_cc_licence stored as parquet as
2021-12-20 18:23:57 +01:00
select distinct r . id , ( case when lic = ' ' or lic is null then 0 else 1 end ) as has_cc_license
2023-07-13 14:25:00 +02:00
from $ { stats_db_name } . result r
2024-03-22 09:16:49 +01:00
left outer join (
select r . id , license . type as lic from $ { stats_db_name } . result r
join $ { stats_db_name } . result_licenses as license on license . id = r . id
where lower ( license . type ) LIKE ' %creativecommons.org% ' OR lower ( license . type ) LIKE ' %cc % ' ) tmp on r . id = tmp . id ; /* EOS */
2021-07-24 15:40:28 +02:00
2024-01-26 01:04:48 +01:00
drop table if exists $ { stats_db_name } . indi_result_has_cc_licence_url purge ; /* EOS */
2023-07-13 14:25:00 +02:00
create table if not exists $ { stats_db_name } . indi_result_has_cc_licence_url stored as parquet as
2021-12-20 18:23:57 +01:00
select distinct r . id , case when lic_host = ' ' or lic_host is null then 0 else 1 end as has_cc_license_url
2023-07-13 14:25:00 +02:00
from $ { stats_db_name } . result r
2024-03-22 09:16:49 +01:00
left outer join (
select r . id , lower ( parse_url ( license . type , " HOST " ) ) as lic_host
from $ { stats_db_name } . result r
join $ { stats_db_name } . result_licenses as license on license . id = r . id
WHERE lower ( parse_url ( license . type , " HOST " ) ) = " creativecommons.org " ) tmp on r . id = tmp . id ; /* EOS */
2021-07-24 15:40:28 +02:00
2024-01-26 01:04:48 +01:00
drop table if exists $ { stats_db_name } . indi_pub_has_abstract purge ; /* EOS */
2023-07-13 14:25:00 +02:00
create table if not exists $ { stats_db_name } . indi_pub_has_abstract stored as parquet as
2023-01-04 10:39:01 +01:00
select distinct publication . id , cast ( coalesce ( abstract , true ) as int ) has_abstract
2024-01-26 01:04:48 +01:00
from $ { stats_db_name } . publication ; /* EOS */
2021-10-01 15:02:02 +02:00
2024-01-26 01:04:48 +01:00
drop table if exists $ { stats_db_name } . indi_result_with_orcid purge ; /* EOS */
2023-07-13 14:25:00 +02:00
create table if not exists $ { stats_db_name } . indi_result_with_orcid stored as parquet as
2021-10-01 15:02:02 +02:00
select distinct r . id , coalesce ( has_orcid , 0 ) as has_orcid
2023-07-13 14:25:00 +02:00
from $ { stats_db_name } . result r
2024-03-22 09:16:49 +01:00
left outer join (
select id , 1 as has_orcid from $ { stats_db_name } . result_orcid ) tmp on r . id = tmp . id ; /* EOS */
2021-12-20 18:23:57 +01:00
2023-10-09 13:00:50 +02:00
2024-03-22 09:16:49 +01:00
- - -- Sprint 3 ----
2024-01-26 01:04:48 +01:00
drop table if exists $ { stats_db_name } . indi_funded_result_with_fundref purge ; /* EOS */
2023-07-13 14:25:00 +02:00
create table if not exists $ { stats_db_name } . indi_funded_result_with_fundref stored as parquet as
2022-09-14 15:36:19 +02:00
select distinct r . result as id , coalesce ( fundref , 0 ) as fundref
2023-07-13 14:25:00 +02:00
from $ { stats_db_name } . project_results r
2024-03-22 09:16:49 +01:00
left outer join (
select distinct result , 1 as fundref from $ { stats_db_name } . project_results where provenance = ' Harvested ' ) tmp on r . result = tmp . result ; /* EOS */
2023-01-04 10:39:01 +01:00
2024-01-26 01:04:48 +01:00
drop table if exists $ { stats_db_name } . indi_result_org_collab purge ; /* EOS */
2023-07-13 14:25:00 +02:00
create table if not exists $ { stats_db_name } . indi_result_org_collab stored as parquet as
2024-03-22 09:16:49 +01:00
WITH tmp AS (
SELECT ro . organization organization , ro . id , o . name
from $ { stats_db_name } . result_organization ro
join $ { stats_db_name } . organization o on o . id = ro . organization where o . name is not null )
select o1 . organization org1 , o1 . name org1name1 , o2 . organization org2 , o2 . name org2name2 , count ( o1 . id ) as collaborations
from tmp as o1
join tmp as o2 where o1 . id = o2 . id and o1 . organization ! = o2 . organization and o1 . name ! = o2 . name
group by o1 . organization , o2 . organization , o1 . name , o2 . name ; /* EOS */
2023-01-04 10:39:01 +01:00
2024-01-26 01:04:48 +01:00
drop table if exists $ { stats_db_name } . indi_result_org_country_collab purge ; /* EOS */
2023-07-13 14:25:00 +02:00
create table if not exists $ { stats_db_name } . indi_result_org_country_collab stored as parquet as
2024-03-22 09:16:49 +01:00
WITH tmp AS (
select distinct ro . organization organization , ro . id , o . name , o . country
from $ { stats_db_name } . result_organization ro
join $ { stats_db_name } . organization o on o . id = ro . organization
where country < > ' UNKNOWN ' and o . name is not null )
select o1 . organization org1 , o1 . name org1name1 , o2 . country country2 , count ( o1 . id ) as collaborations
from tmp as o1 join tmp as o2 on o1 . id = o2 . id
where o1 . id = o2 . id and o1 . country ! = o2 . country
group by o1 . organization , o1 . id , o1 . name , o2 . country ; /* EOS */
2023-05-25 13:52:34 +02:00
2024-01-26 01:04:48 +01:00
drop table if exists $ { stats_db_name } . indi_project_collab_org purge ; /* EOS */
2023-07-13 14:25:00 +02:00
create table if not exists $ { stats_db_name } . indi_project_collab_org stored as parquet as
2024-03-22 09:16:49 +01:00
WITH tmp AS (
select o . id organization , o . name , ro . project as project
from $ { stats_db_name } . organization o
join $ { stats_db_name } . organization_projects ro on o . id = ro . id where o . name is not null )
select o1 . organization org1 , o1 . name orgname1 , o2 . organization org2 , o2 . name orgname2 , count ( distinct o1 . project ) as collaborations
from tmp as o1
join tmp as o2 on o1 . project = o2 . project
where o1 . organization < > o2 . organization and o1 . name < > o2 . name
group by o1 . name , o2 . name , o1 . organization , o2 . organization ; /* EOS */
2023-01-04 10:39:01 +01:00
2024-01-26 01:04:48 +01:00
drop table if exists $ { stats_db_name } . indi_project_collab_org_country purge ; /* EOS */
2023-07-13 14:25:00 +02:00
create table if not exists $ { stats_db_name } . indi_project_collab_org_country stored as parquet as
2024-03-22 09:16:49 +01:00
WITH tmp AS (
select o . id organization , o . name , o . country , ro . project as project
from $ { stats_db_name } . organization o
join $ { stats_db_name } . organization_projects ro on o . id = ro . id and o . country < > ' UNKNOWN ' and o . name is not null )
select o1 . organization org1 , o1 . name org1name , o2 . country country2 , count ( distinct o1 . project ) as collaborations
from tmp as o1
join tmp as o2 on o1 . project = o2 . project
where o1 . organization < > o2 . organization and o1 . country < > o2 . country
group by o1 . organization , o2 . country , o1 . name ; /* EOS */
2023-01-04 10:39:01 +01:00
2024-01-26 01:04:48 +01:00
drop table if exists $ { stats_db_name } . indi_funder_country_collab purge ; /* EOS */
2023-07-13 14:25:00 +02:00
create table if not exists $ { stats_db_name } . indi_funder_country_collab stored as parquet as
with tmp as ( select funder , project , country from $ { stats_db_name } . organization_projects op
join $ { stats_db_name } . organization o on o . id = op . id
join $ { stats_db_name } . project p on p . id = op . project
2022-08-05 12:45:01 +02:00
where country < > ' UNKNOWN ' )
2024-03-22 09:16:49 +01:00
select f1 . funder , f1 . country as country1 , f2 . country as country2 , count ( distinct f1 . project ) as collaborations
from tmp as f1
join tmp as f2 on f1 . project = f2 . project
where f1 . country < > f2 . country
group by f1 . funder , f2 . country , f1 . country ; /* EOS */
2023-01-04 10:39:01 +01:00
2024-01-26 01:04:48 +01:00
drop table if exists $ { stats_db_name } . indi_result_country_collab purge ; /* EOS */
2023-07-13 14:25:00 +02:00
create table if not exists $ { stats_db_name } . indi_result_country_collab stored as parquet as
2024-03-22 09:16:49 +01:00
WITH tmp AS (
select distinct country , ro . id as result from $ { stats_db_name } . organization o
join $ { stats_db_name } . result_organization ro on o . id = ro . organization
where country < > ' UNKNOWN ' and o . name is not null )
select o1 . country country1 , o2 . country country2 , count ( o1 . result ) as collaborations
from tmp as o1
join tmp as o2 on o1 . result = o2 . result
where o1 . country < > o2 . country
group by o1 . country , o2 . country ; /* EOS */
2022-09-22 12:33:07 +02:00
2023-01-04 10:39:01 +01:00
2021-12-20 18:23:57 +01:00
- - -- Sprint 4 ----
2024-01-26 01:04:48 +01:00
drop table if exists $ { stats_db_name } . indi_pub_diamond purge ; /* EOS */
2023-07-13 14:25:00 +02:00
create table if not exists $ { stats_db_name } . indi_pub_diamond stored as parquet as
2024-03-22 09:16:49 +01:00
select distinct pd . id , coalesce ( in_diamond_journal , 0 ) as in_diamond_journal
2024-03-21 11:44:30 +01:00
from $ { stats_db_name } . publication_datasources pd
2024-03-22 09:16:49 +01:00
left outer join (
select pd . id , 1 as in_diamond_journal
from $ { stats_db_name } . publication_datasources pd
join $ { stats_db_name } . datasource d on d . id = pd . datasource
join STATS_EXT . plan_s_jn ps where ( ps . issn_print = d . issn_printed and ps . issn_online = d . issn_online )
and ( ps . journal_is_in_doaj = true or ps . journal_is_oa = true ) and ps . has_apc = false ) tmp on pd . id = tmp . id ; /* EOS */
2021-11-26 14:22:04 +01:00
2024-01-26 01:04:48 +01:00
drop table if exists $ { stats_db_name } . indi_pub_in_transformative purge ; /* EOS */
2023-07-13 14:25:00 +02:00
create table if not exists $ { stats_db_name } . indi_pub_in_transformative stored as parquet as
2024-03-22 09:16:49 +01:00
select distinct pd . id , coalesce ( is_transformative , 0 ) as is_transformative
from $ { stats_db_name } . publication pd
left outer join (
select pd . id , 1 as is_transformative
from $ { stats_db_name } . publication_datasources pd
join $ { stats_db_name } . datasource d on d . id = pd . datasource
join STATS_EXT . plan_s_jn ps where ( ps . issn_print = d . issn_printed and ps . issn_online = d . issn_online )
and ps . is_transformative_journal = true ) tmp on pd . id = tmp . id ; /* EOS */
2021-11-26 14:22:04 +01:00
2024-01-26 01:04:48 +01:00
drop table if exists $ { stats_db_name } . indi_pub_closed_other_open purge ; /* EOS */
2023-07-13 14:25:00 +02:00
create table if not exists $ { stats_db_name } . indi_pub_closed_other_open stored as parquet as
2024-03-22 09:16:49 +01:00
select distinct ri . id , coalesce ( pub_closed_other_open , 0 ) as pub_closed_other_open
2024-03-21 11:44:30 +01:00
from $ { stats_db_name } . result_instance ri
2024-03-22 09:16:49 +01:00
left outer join (
select ri . id , 1 as pub_closed_other_open
from $ { stats_db_name } . result_instance ri
join $ { stats_db_name } . publication p on p . id = ri . id
join $ { stats_db_name } . datasource d on ri . hostedby = d . id
where d . type like ' %Journal% ' and ri . accessright = ' Closed Access ' and
( p . bestlicence = ' Open Access ' or p . bestlicence = ' Open Source ' ) ) tmp on tmp . id = ri . id ; /* EOS */
2021-12-20 18:23:57 +01:00
2021-12-21 14:54:38 +01:00
- - -- Sprint 5 ----
2024-01-26 01:04:48 +01:00
drop table if exists $ { stats_db_name } . indi_result_no_of_copies purge ; /* EOS */
2023-07-13 14:25:00 +02:00
create table if not exists $ { stats_db_name } . indi_result_no_of_copies stored as parquet as
2024-03-22 09:16:49 +01:00
select id , count ( id ) as number_of_copies
from $ { stats_db_name } . result_instance
group by id ; /* EOS */
2022-02-17 09:21:09 +01:00
2022-08-02 12:39:34 +02:00
- - -- Sprint 6 ----
2024-01-26 01:04:48 +01:00
drop table if exists $ { stats_db_name } . indi_pub_downloads purge ; /* EOS */
2023-07-13 14:25:00 +02:00
create table if not exists $ { stats_db_name } . indi_pub_downloads stored as parquet as
2024-03-22 09:16:49 +01:00
SELECT result_id , sum ( downloads ) no_downloads
from openaire_prod_usage_stats . usage_stats
join $ { stats_db_name } . publication on result_id = id
where downloads > 0
GROUP BY result_id ; /* EOS */
2022-04-06 11:40:02 +02:00
2024-01-26 01:04:48 +01:00
drop table if exists $ { stats_db_name } . indi_pub_downloads_datasource purge ; /* EOS */
2023-07-13 14:25:00 +02:00
create table if not exists $ { stats_db_name } . indi_pub_downloads_datasource stored as parquet as
2024-03-22 09:16:49 +01:00
SELECT result_id , repository_id , sum ( downloads ) no_downloads
from openaire_prod_usage_stats . usage_stats
join $ { stats_db_name } . publication on result_id = id
where downloads > 0
GROUP BY result_id , repository_id ; /* EOS */
2022-02-17 09:21:09 +01:00
2024-01-26 01:04:48 +01:00
drop table if exists $ { stats_db_name } . indi_pub_downloads_year purge ; /* EOS */
2023-07-13 14:25:00 +02:00
create table if not exists $ { stats_db_name } . indi_pub_downloads_year stored as parquet as
2024-03-22 09:16:49 +01:00
SELECT result_id , cast ( substring ( us . ` date ` , 1 , 4 ) as int ) as ` year ` , sum ( downloads ) no_downloads
from openaire_prod_usage_stats . usage_stats us
join $ { stats_db_name } . publication on result_id = id where downloads > 0
GROUP BY result_id , substring ( us . ` date ` , 1 , 4 ) ; /* EOS */
2022-02-17 09:21:09 +01:00
2024-01-26 01:04:48 +01:00
drop table if exists $ { stats_db_name } . indi_pub_downloads_datasource_year purge ; /* EOS */
2023-07-13 14:25:00 +02:00
create table if not exists $ { stats_db_name } . indi_pub_downloads_datasource_year stored as parquet as
2024-03-22 09:16:49 +01:00
SELECT result_id , cast ( substring ( us . ` date ` , 1 , 4 ) as int ) as ` year ` , repository_id , sum ( downloads ) no_downloads
from openaire_prod_usage_stats . usage_stats us
join $ { stats_db_name } . publication on result_id = id
where downloads > 0
GROUP BY result_id , repository_id , substring ( us . ` date ` , 1 , 4 ) ; /* EOS */
2022-04-06 11:40:02 +02:00
2022-08-02 12:39:34 +02:00
- - -- Sprint 7 ----
2024-01-26 01:04:48 +01:00
drop table if exists $ { stats_db_name } . indi_pub_gold_oa purge ; /* EOS */
2023-07-13 14:25:00 +02:00
create table if not exists $ { stats_db_name } . indi_pub_gold_oa stored as parquet as
2024-03-22 09:16:49 +01:00
with gold_oa as (
select distinct issn from (
SELECT issn_l as issn from stats_ext . issn_gold_oa_dataset_v5
UNION ALL
SELECT issn as issn from stats_ext . issn_gold_oa_dataset_v5
UNION ALL
select issn from stats_ext . alljournals where journal_is_in_doaj = true or journal_is_oa = true
UNION ALL
select issn_l as issn from stats_ext . alljournals where journal_is_in_doaj = true or journal_is_oa = true ) foo ) ,
dd as (
select distinct * from (
select id , issn_printed as issn from $ { stats_db_name } . datasource d where d . id like ' %doajarticles% '
UNION ALL
select id , issn_online as issn from $ { stats_db_name } . datasource d where d . id like ' %doajarticles% '
UNION ALL
select id , issn_printed as issn from $ { stats_db_name } . datasource d join gold_oa on gold_oa . issn = d . issn_printed
UNION ALL
select id , issn_online as issn from $ { stats_db_name } . datasource d join gold_oa on gold_oa . issn = d . issn_online ) foo
)
SELECT DISTINCT pd . id , coalesce ( is_gold , 0 ) as is_gold
FROM $ { stats_db_name } . publication_datasources pd
left outer join (
select pd . id , 1 as is_gold
FROM $ { stats_db_name } . publication_datasources pd
join dd on dd . id = pd . datasource
left outer join $ { stats_db_name } . result_accessroute ra on ra . id = pd . id where ra . accessroute = ' gold ' ) tmp on tmp . id = pd . id ; /* EOS */
2023-10-09 13:00:50 +02:00
2024-01-26 01:04:48 +01:00
drop table if exists $ { stats_db_name } . indi_pub_hybrid_oa_with_cc purge ; /* EOS */
2023-07-13 14:25:00 +02:00
create table if not exists $ { stats_db_name } . indi_pub_hybrid_oa_with_cc stored as parquet as
2023-06-07 16:43:37 +02:00
WITH hybrid_oa AS (
SELECT issn_l , journal_is_in_doaj , journal_is_oa , issn_print as issn
FROM STATS_EXT . plan_s_jn
WHERE issn_print ! = " "
UNION ALL
SELECT issn_l , journal_is_in_doaj , journal_is_oa , issn_online as issn
FROM STATS_EXT . plan_s_jn
WHERE issn_online ! = " " and ( journal_is_in_doaj = FALSE OR journal_is_oa = FALSE ) ) ,
issn AS (
2024-03-22 09:16:49 +01:00
SELECT *
FROM (
SELECT id , issn_printed as issn
FROM $ { stats_db_name } . datasource
WHERE issn_printed IS NOT NULL
UNION ALL
SELECT id , issn_online as issn
FROM $ { stats_db_name } . datasource
WHERE issn_online IS NOT NULL ) as issn
WHERE LENGTH ( issn ) > 7 )
SELECT DISTINCT pd . id , coalesce ( is_hybrid_oa , 0 ) as is_hybrid_oa
FROM $ { stats_db_name } . publication_datasources pd
LEFT OUTER JOIN (
SELECT pd . id , 1 as is_hybrid_oa from $ { stats_db_name } . publication_datasources pd
JOIN $ { stats_db_name } . datasource d on d . id = pd . datasource
JOIN issn on issn . id = pd . datasource
JOIN hybrid_oa ON issn . issn = hybrid_oa . issn
JOIN $ { stats_db_name } . indi_result_has_cc_licence cc on pd . id = cc . id
JOIN $ { stats_db_name } . indi_pub_gold_oa ga on pd . id = ga . id where cc . has_cc_license = 1 and ga . is_gold = 0 ) tmp on pd . id = tmp . id ; /* EOS */
2023-06-07 16:43:37 +02:00
2024-01-26 01:04:48 +01:00
drop table if exists $ { stats_db_name } . indi_pub_hybrid purge ; /* EOS */
2023-07-13 14:25:00 +02:00
create table if not exists $ { stats_db_name } . indi_pub_hybrid stored as parquet as
2023-12-01 12:38:19 +01:00
select distinct pd . id , coalesce ( is_hybrid , 0 ) is_hybrid from $ { stats_db_name } . publication pd
2024-03-21 11:44:30 +01:00
left outer join (
select pd . id , 1 as is_hybrid from $ { stats_db_name } . publication pd
join $ { stats_db_name } . result_instance ri on ri . id = pd . id
join $ { stats_db_name } . indi_pub_gold_oa indi_gold on indi_gold . id = pd . id
join $ { stats_db_name } . result_accessroute ra on ra . id = pd . id
join $ { stats_db_name } . datasource d on d . id = ri . hostedby
where indi_gold . is_gold = 0 and ( ( d . type like ' %Journal% ' and ri . accessright ! = ' Closed Access ' and ri . accessright ! = ' Restricted ' and ri . license is not null ) or ra . accessroute = ' hybrid ' ) ) tmp on pd . id = tmp . id ; /* EOS */
2023-10-09 13:00:50 +02:00
2024-01-26 01:04:48 +01:00
drop table if exists $ { stats_db_name } . indi_org_fairness purge ; /* EOS */
2023-07-13 14:25:00 +02:00
create table if not exists $ { stats_db_name } . indi_org_fairness stored as parquet as
2022-08-02 12:39:34 +02:00
- - return results with PIDs , and rich metadata group by organization
2024-03-22 09:16:49 +01:00
with result_fair as (
select ro . organization organization , count ( distinct ro . id ) no_result_fair
from $ { stats_db_name } . result_organization ro
join $ { stats_db_name } . result r on r . id = ro . id
2022-08-02 12:39:34 +02:00
- - join result_pids rp on r . id = rp . id
2024-03-22 09:16:49 +01:00
where ( title is not null ) and ( publisher is not null ) and ( abstract = true ) and ( year is not null ) and ( authors > 0 ) and cast ( year as int ) > 2003
group by ro . organization ) ,
2022-08-02 12:39:34 +02:00
- - return all results group by organization
2024-03-22 09:16:49 +01:00
allresults as (
select ro . organization , count ( distinct ro . id ) no_allresults from $ { stats_db_name } . result_organization ro
join $ { stats_db_name } . result r on r . id = ro . id
where cast ( year as int ) > 2003
group by ro . organization )
2022-08-02 12:39:34 +02:00
- - return results_fair / all_results
2024-03-22 09:16:49 +01:00
select allresults . organization , result_fair . no_result_fair / allresults . no_allresults org_fairness
from allresults
join result_fair on result_fair . organization = allresults . organization ; /* EOS */
2022-08-02 12:39:34 +02:00
2024-01-26 20:19:52 +01:00
CREATE TEMPORARY VIEW result_fair as
2023-07-13 14:25:00 +02:00
select ro . organization organization , count ( distinct ro . id ) no_result_fair
from $ { stats_db_name } . result_organization ro
join $ { stats_db_name } . publication p on p . id = ro . id
join $ { stats_db_name } . indi_pub_doi_from_crossref dc on dc . id = p . id
join $ { stats_db_name } . indi_pub_grey_lit gl on gl . id = p . id
2023-01-04 10:39:01 +01:00
where ( title is not null ) and ( publisher is not null ) and ( abstract = true ) and ( year is not null )
2022-08-02 12:39:34 +02:00
and ( authors > 0 ) and cast ( year as int ) > 2003 and dc . doi_from_crossref = 1 and gl . grey_lit = 0
2024-01-26 01:04:48 +01:00
group by ro . organization ; /* EOS */
2023-07-13 14:25:00 +02:00
2024-01-26 20:19:52 +01:00
CREATE TEMPORARY VIEW allresults as
2023-07-13 14:25:00 +02:00
select ro . organization , count ( distinct ro . id ) no_allresults from $ { stats_db_name } . result_organization ro
join $ { stats_db_name } . publication p on p . id = ro . id
2022-08-02 12:39:34 +02:00
where cast ( year as int ) > 2003
2024-01-26 01:04:48 +01:00
group by ro . organization ; /* EOS */
2023-07-13 14:25:00 +02:00
2024-01-26 01:04:48 +01:00
drop table if exists $ { stats_db_name } . indi_org_fairness_pub_pr purge ; /* EOS */
2023-10-09 13:00:50 +02:00
2023-07-13 14:25:00 +02:00
create table if not exists $ { stats_db_name } . indi_org_fairness_pub_pr stored as parquet as
select ar . organization , rf . no_result_fair / ar . no_allresults org_fairness
2024-01-26 20:19:52 +01:00
from allresults ar
join result_fair rf on rf . organization = ar . organization ; /* EOS */
2022-08-02 12:39:34 +02:00
2024-03-21 11:44:30 +01:00
DROP VIEW result_fair ;
DROP VIEW allresults ;
2024-01-26 20:19:52 +01:00
CREATE TEMPORARY VIEW result_fair as
2023-07-13 14:25:00 +02:00
select year , ro . organization organization , count ( distinct ro . id ) no_result_fair from $ { stats_db_name } . result_organization ro
join $ { stats_db_name } . result p on p . id = ro . id
2023-01-04 10:39:01 +01:00
where ( title is not null ) and ( publisher is not null ) and ( abstract = true ) and ( year is not null ) and ( authors > 0 ) and cast ( year as int ) > 2003
2024-01-26 01:04:48 +01:00
group by ro . organization , year ; /* EOS */
2023-01-04 10:39:01 +01:00
2024-01-26 20:19:52 +01:00
CREATE TEMPORARY VIEW allresults as select year , ro . organization , count ( distinct ro . id ) no_allresults from $ { stats_db_name } . result_organization ro
2023-07-13 14:25:00 +02:00
join $ { stats_db_name } . result p on p . id = ro . id
2022-08-02 12:39:34 +02:00
where cast ( year as int ) > 2003
2024-01-26 01:04:48 +01:00
group by ro . organization , year ; /* EOS */
2023-01-04 10:39:01 +01:00
2024-01-26 01:04:48 +01:00
drop table if exists $ { stats_db_name } . indi_org_fairness_pub_year purge ; /* EOS */
2023-10-09 13:00:50 +02:00
2023-07-13 14:25:00 +02:00
create table if not exists $ { stats_db_name } . indi_org_fairness_pub_year stored as parquet as
2023-11-15 13:32:18 +01:00
select cast ( allresults . year as int ) year , allresults . organization , result_fair . no_result_fair / allresults . no_allresults org_fairness
2024-01-26 20:19:52 +01:00
from allresults
join result_fair on result_fair . organization = allresults . organization and result_fair . year = allresults . year ; /* EOS */
2022-08-02 12:39:34 +02:00
2024-01-26 20:19:52 +01:00
DROP VIEW result_fair ; /* EOS */
DROP VIEW allresults ; /* EOS */
2023-01-04 10:39:01 +01:00
2024-01-26 20:19:52 +01:00
CREATE TEMPORARY VIEW result_fair as
2023-01-04 10:39:01 +01:00
select ro . organization organization , count ( distinct ro . id ) no_result_fair
2023-07-13 14:25:00 +02:00
from $ { stats_db_name } . result_organization ro
join $ { stats_db_name } . result p on p . id = ro . id
2023-01-04 10:39:01 +01:00
where ( title is not null ) and ( publisher is not null ) and ( abstract = true ) and ( year is not null )
and ( authors > 0 ) and cast ( year as int ) > 2003
2024-01-26 01:04:48 +01:00
group by ro . organization ; /* EOS */
2023-01-04 10:39:01 +01:00
2024-01-26 20:19:52 +01:00
CREATE TEMPORARY VIEW allresults as
2023-07-13 14:25:00 +02:00
select ro . organization , count ( distinct ro . id ) no_allresults from $ { stats_db_name } . result_organization ro
join $ { stats_db_name } . result p on p . id = ro . id
2023-01-04 10:39:01 +01:00
where cast ( year as int ) > 2003
2024-01-26 01:04:48 +01:00
group by ro . organization ; /* EOS */
2023-01-04 10:39:01 +01:00
2024-01-26 01:04:48 +01:00
drop table if exists $ { stats_db_name } . indi_org_fairness_pub purge ; /* EOS */
2023-10-09 13:00:50 +02:00
2023-07-13 14:25:00 +02:00
create table if not exists $ { stats_db_name } . indi_org_fairness_pub as
select ar . organization , rf . no_result_fair / ar . no_allresults org_fairness
2024-01-26 20:19:52 +01:00
from allresults ar join result_fair rf
2024-01-26 01:04:48 +01:00
on rf . organization = ar . organization ; /* EOS */
2022-08-02 12:39:34 +02:00
2024-01-26 20:19:52 +01:00
DROP VIEW result_fair ; /* EOS */
DROP VIEW allresults ; /* EOS */
2022-08-02 12:39:34 +02:00
2024-01-26 20:19:52 +01:00
CREATE TEMPORARY VIEW result_fair as
2023-07-13 14:25:00 +02:00
select year , ro . organization organization , count ( distinct ro . id ) no_result_fair from $ { stats_db_name } . result_organization ro
join $ { stats_db_name } . result r on r . id = ro . id
join $ { stats_db_name } . result_pids rp on r . id = rp . id
2023-01-04 10:39:01 +01:00
where ( title is not null ) and ( publisher is not null ) and ( abstract = true ) and ( year is not null ) and ( authors > 0 ) and cast ( year as int ) > 2003
2024-01-26 01:04:48 +01:00
group by ro . organization , year ; /* EOS */
2023-01-04 10:39:01 +01:00
2024-01-26 20:19:52 +01:00
CREATE TEMPORARY VIEW allresults as
2023-07-13 14:25:00 +02:00
select year , ro . organization , count ( distinct ro . id ) no_allresults from $ { stats_db_name } . result_organization ro
join $ { stats_db_name } . result r on r . id = ro . id
2022-09-09 12:15:58 +02:00
where cast ( year as int ) > 2003
2024-01-26 01:04:48 +01:00
group by ro . organization , year ; /* EOS */
2022-08-02 12:39:34 +02:00
2024-01-26 01:04:48 +01:00
drop table if exists $ { stats_db_name } . indi_org_fairness_year purge ; /* EOS */
2023-10-09 13:00:50 +02:00
2023-07-13 14:25:00 +02:00
create table if not exists $ { stats_db_name } . indi_org_fairness_year stored as parquet as
2023-09-19 13:25:42 +02:00
select cast ( allresults . year as int ) year , allresults . organization , result_fair . no_result_fair / allresults . no_allresults org_fairness
2024-01-26 20:19:52 +01:00
from allresults
join result_fair on result_fair . organization = allresults . organization and cast ( result_fair . year as int ) = cast ( allresults . year as int ) ; /* EOS */
2022-08-02 12:39:34 +02:00
2024-01-26 20:19:52 +01:00
DROP VIEW result_fair ; /* EOS */
DROP VIEW allresults ; /* EOS */
2023-01-04 10:39:01 +01:00
2024-01-26 20:19:52 +01:00
CREATE TEMPORARY VIEW result_with_pid as
2023-07-13 14:25:00 +02:00
select year , ro . organization , count ( distinct rp . id ) no_result_with_pid from $ { stats_db_name } . result_organization ro
join $ { stats_db_name } . result_pids rp on rp . id = ro . id
join $ { stats_db_name } . result r on r . id = rp . id
2022-09-09 12:15:58 +02:00
where cast ( year as int ) > 2003
2024-01-26 01:04:48 +01:00
group by ro . organization , year ; /* EOS */
2023-01-04 10:39:01 +01:00
2024-01-26 20:19:52 +01:00
CREATE TEMPORARY VIEW allresults as
2023-07-13 14:25:00 +02:00
select year , ro . organization , count ( distinct ro . id ) no_allresults from $ { stats_db_name } . result_organization ro
join $ { stats_db_name } . result r on r . id = ro . id
2022-09-09 12:15:58 +02:00
where cast ( year as int ) > 2003
2024-01-26 01:04:48 +01:00
group by ro . organization , year ; /* EOS */
2023-01-04 10:39:01 +01:00
2024-01-26 01:04:48 +01:00
drop table if exists $ { stats_db_name } . indi_org_findable_year purge ; /* EOS */
2023-10-09 13:00:50 +02:00
2023-07-13 14:25:00 +02:00
create table if not exists $ { stats_db_name } . indi_org_findable_year stored as parquet as
2023-09-19 13:25:42 +02:00
select cast ( allresults . year as int ) year , allresults . organization , result_with_pid . no_result_with_pid / allresults . no_allresults org_findable
2024-01-26 20:19:52 +01:00
from allresults
join result_with_pid on result_with_pid . organization = allresults . organization and cast ( result_with_pid . year as int ) = cast ( allresults . year as int ) ; /* EOS */
2022-08-02 12:39:34 +02:00
2024-01-26 20:19:52 +01:00
DROP VIEW result_with_pid ; /* EOS */
DROP VIEW allresults ; /* EOS */
2023-01-04 10:39:01 +01:00
2024-01-26 20:19:52 +01:00
CREATE TEMPORARY VIEW result_with_pid as
2023-07-13 14:25:00 +02:00
select ro . organization , count ( distinct rp . id ) no_result_with_pid from $ { stats_db_name } . result_organization ro
join $ { stats_db_name } . result_pids rp on rp . id = ro . id
join $ { stats_db_name } . result r on r . id = rp . id
2022-09-09 12:15:58 +02:00
where cast ( year as int ) > 2003
2024-01-26 01:04:48 +01:00
group by ro . organization ; /* EOS */
2023-01-04 10:39:01 +01:00
2024-01-26 20:19:52 +01:00
CREATE TEMPORARY VIEW allresults as
2023-07-13 14:25:00 +02:00
select ro . organization , count ( distinct ro . id ) no_allresults from $ { stats_db_name } . result_organization ro
join $ { stats_db_name } . result r on r . id = ro . id
2022-09-09 12:15:58 +02:00
where cast ( year as int ) > 2003
2024-01-26 01:04:48 +01:00
group by ro . organization ; /* EOS */
2023-01-04 10:39:01 +01:00
2024-01-26 01:04:48 +01:00
drop table if exists $ { stats_db_name } . indi_org_findable purge ; /* EOS */
2023-10-09 13:00:50 +02:00
2023-07-13 14:25:00 +02:00
create table if not exists $ { stats_db_name } . indi_org_findable stored as parquet as
2022-08-02 12:39:34 +02:00
select allresults . organization , result_with_pid . no_result_with_pid / allresults . no_allresults org_findable
2024-01-26 20:19:52 +01:00
from allresults
join result_with_pid on result_with_pid . organization = allresults . organization ; /* EOS */
2022-08-02 12:39:34 +02:00
2024-01-26 20:19:52 +01:00
DROP VIEW result_with_pid ; /* EOS */
DROP VIEW allresults ; /* EOS */
2022-08-02 12:39:34 +02:00
2024-01-26 20:19:52 +01:00
CREATE TEMPORARY VIEW pubs_oa as
2023-07-13 14:25:00 +02:00
SELECT ro . organization , count ( distinct r . id ) no_oapubs FROM $ { stats_db_name } . publication r
join $ { stats_db_name } . result_organization ro on ro . id = r . id
join $ { stats_db_name } . result_instance ri on ri . id = r . id
2022-08-02 12:39:34 +02:00
where ( ri . accessright = ' Open Access ' or ri . accessright = ' Embargo ' or ri . accessright = ' Open Source ' )
and cast ( r . year as int ) > 2003
2024-01-26 01:04:48 +01:00
group by ro . organization ; /* EOS */
2023-01-04 10:39:01 +01:00
2024-01-26 20:19:52 +01:00
CREATE TEMPORARY VIEW datasets_oa as
2023-07-13 14:25:00 +02:00
SELECT ro . organization , count ( distinct r . id ) no_oadatasets FROM $ { stats_db_name } . dataset r
join $ { stats_db_name } . result_organization ro on ro . id = r . id
join $ { stats_db_name } . result_instance ri on ri . id = r . id
2022-08-02 12:39:34 +02:00
where ( ri . accessright = ' Open Access ' or ri . accessright = ' Embargo ' or ri . accessright = ' Open Source ' )
and cast ( r . year as int ) > 2003
2024-01-26 01:04:48 +01:00
group by ro . organization ; /* EOS */
2023-01-04 10:39:01 +01:00
2024-01-26 20:19:52 +01:00
CREATE TEMPORARY VIEW software_oa as
2023-07-13 14:25:00 +02:00
SELECT ro . organization , count ( distinct r . id ) no_oasoftware FROM $ { stats_db_name } . software r
join $ { stats_db_name } . result_organization ro on ro . id = r . id
join $ { stats_db_name } . result_instance ri on ri . id = r . id
2022-08-02 12:39:34 +02:00
where ( ri . accessright = ' Open Access ' or ri . accessright = ' Embargo ' or ri . accessright = ' Open Source ' )
and cast ( r . year as int ) > 2003
2024-01-26 01:04:48 +01:00
group by ro . organization ; /* EOS */
2023-01-04 10:39:01 +01:00
2024-01-26 20:19:52 +01:00
CREATE TEMPORARY VIEW allpubs as
2023-07-13 14:25:00 +02:00
SELECT ro . organization , count ( ro . id ) no_allpubs FROM $ { stats_db_name } . result_organization ro
join $ { stats_db_name } . publication ps on ps . id = ro . id
2022-08-02 12:39:34 +02:00
where cast ( ps . year as int ) > 2003
2024-01-26 01:04:48 +01:00
group by ro . organization ; /* EOS */
2023-01-04 10:39:01 +01:00
2024-01-26 20:19:52 +01:00
CREATE TEMPORARY VIEW alldatasets as
2023-07-13 14:25:00 +02:00
SELECT ro . organization , count ( ro . id ) no_alldatasets FROM $ { stats_db_name } . result_organization ro
join $ { stats_db_name } . dataset ps on ps . id = ro . id
2022-08-02 12:39:34 +02:00
where cast ( ps . year as int ) > 2003
2024-01-26 01:04:48 +01:00
group by ro . organization ; /* EOS */
2023-01-04 10:39:01 +01:00
2024-01-26 20:19:52 +01:00
CREATE TEMPORARY VIEW allsoftware as
2023-07-13 14:25:00 +02:00
SELECT ro . organization , count ( ro . id ) no_allsoftware FROM $ { stats_db_name } . result_organization ro
join $ { stats_db_name } . software ps on ps . id = ro . id
2022-08-02 12:39:34 +02:00
where cast ( ps . year as int ) > 2003
2024-01-26 01:04:48 +01:00
group by ro . organization ; /* EOS */
2023-01-04 10:39:01 +01:00
2024-01-26 20:19:52 +01:00
CREATE TEMPORARY VIEW allpubsshare as
select pubs_oa . organization , pubs_oa . no_oapubs / allpubs . no_allpubs p from allpubs
join pubs_oa on allpubs . organization = pubs_oa . organization ; /* EOS */
2023-01-04 10:39:01 +01:00
2024-01-26 20:19:52 +01:00
CREATE TEMPORARY VIEW alldatasetssshare as
2023-01-04 10:39:01 +01:00
select datasets_oa . organization , datasets_oa . no_oadatasets / alldatasets . no_alldatasets d
2024-01-26 20:19:52 +01:00
from alldatasets
join datasets_oa on alldatasets . organization = datasets_oa . organization ; /* EOS */
2023-01-04 10:39:01 +01:00
2024-01-26 20:19:52 +01:00
CREATE TEMPORARY VIEW allsoftwaresshare as
2023-01-04 10:39:01 +01:00
select software_oa . organization , software_oa . no_oasoftware / allsoftware . no_allsoftware s
2024-01-26 20:19:52 +01:00
from allsoftware
join software_oa on allsoftware . organization = software_oa . organization ; /* EOS */
2023-01-04 10:39:01 +01:00
2024-01-26 01:04:48 +01:00
drop table if exists $ { stats_db_name } . indi_org_openess purge ; /* EOS */
2023-10-09 13:00:50 +02:00
2023-07-13 14:25:00 +02:00
create table if not exists $ { stats_db_name } . indi_org_openess stored as parquet as
2022-08-02 12:39:34 +02:00
select allpubsshare . organization ,
2023-01-04 10:39:01 +01:00
( p + if ( isnull ( s ) , 0 , s ) + if ( isnull ( d ) , 0 , d ) ) / ( 1 + ( case when s is null then 0 else 1 end )
2022-08-02 12:39:34 +02:00
+ ( case when d is null then 0 else 1 end ) )
2024-01-26 20:19:52 +01:00
org_openess FROM allpubsshare
2022-08-02 12:39:34 +02:00
left outer join ( select organization , d from
2024-01-26 20:19:52 +01:00
alldatasetssshare ) tmp1
2022-08-02 12:39:34 +02:00
on tmp1 . organization = allpubsshare . organization
left outer join ( select organization , s from
2024-01-26 20:19:52 +01:00
allsoftwaresshare ) tmp2
2024-01-26 01:04:48 +01:00
on tmp2 . organization = allpubsshare . organization ; /* EOS */
2024-01-26 20:19:52 +01:00
DROP VIEW pubs_oa ; /* EOS */
DROP VIEW datasets_oa ; /* EOS */
DROP VIEW software_oa ; /* EOS */
DROP VIEW allpubs ; /* EOS */
DROP VIEW alldatasets ; /* EOS */
DROP VIEW allsoftware ; /* EOS */
DROP VIEW allpubsshare ; /* EOS */
DROP VIEW alldatasetssshare ; /* EOS */
DROP VIEW allsoftwaresshare ; /* EOS */
CREATE TEMPORARY VIEW pubs_oa AS
2023-07-13 14:25:00 +02:00
SELECT r . year , ro . organization , count ( distinct r . id ) no_oapubs FROM $ { stats_db_name } . publication r
join $ { stats_db_name } . result_organization ro on ro . id = r . id
join $ { stats_db_name } . result_instance ri on ri . id = r . id
2022-08-02 12:39:34 +02:00
where ( ri . accessright = ' Open Access ' or ri . accessright = ' Embargo ' or ri . accessright = ' Open Source ' )
and cast ( r . year as int ) > 2003
2024-01-26 01:04:48 +01:00
group by ro . organization , r . year ; /* EOS */
2023-01-04 10:39:01 +01:00
2024-01-26 20:19:52 +01:00
CREATE TEMPORARY VIEW datasets_oa AS
2023-07-13 14:25:00 +02:00
SELECT r . year , ro . organization , count ( distinct r . id ) no_oadatasets FROM $ { stats_db_name } . dataset r
join $ { stats_db_name } . result_organization ro on ro . id = r . id
join $ { stats_db_name } . result_instance ri on ri . id = r . id
2022-08-02 12:39:34 +02:00
where ( ri . accessright = ' Open Access ' or ri . accessright = ' Embargo ' or ri . accessright = ' Open Source ' )
and cast ( r . year as int ) > 2003
2024-01-26 01:04:48 +01:00
group by ro . organization , r . year ; /* EOS */
2023-01-04 10:39:01 +01:00
2024-01-26 20:19:52 +01:00
CREATE TEMPORARY VIEW software_oa AS
2023-07-13 14:25:00 +02:00
SELECT r . year , ro . organization , count ( distinct r . id ) no_oasoftware FROM $ { stats_db_name } . software r
join $ { stats_db_name } . result_organization ro on ro . id = r . id
join $ { stats_db_name } . result_instance ri on ri . id = r . id
2022-08-02 12:39:34 +02:00
where ( ri . accessright = ' Open Access ' or ri . accessright = ' Embargo ' or ri . accessright = ' Open Source ' )
and cast ( r . year as int ) > 2003
2024-01-26 01:04:48 +01:00
group by ro . organization , r . year ; /* EOS */
2023-01-04 10:39:01 +01:00
2024-01-26 20:19:52 +01:00
CREATE TEMPORARY VIEW allpubs as
2023-07-13 14:25:00 +02:00
SELECT p . year , ro . organization organization , count ( ro . id ) no_allpubs FROM $ { stats_db_name } . result_organization ro
join $ { stats_db_name } . publication p on p . id = ro . id where cast ( p . year as int ) > 2003
2024-01-26 01:04:48 +01:00
group by ro . organization , p . year ; /* EOS */
2023-01-04 10:39:01 +01:00
2024-01-26 20:19:52 +01:00
CREATE TEMPORARY VIEW alldatasets as
2023-07-13 14:25:00 +02:00
SELECT d . year , ro . organization organization , count ( ro . id ) no_alldatasets FROM $ { stats_db_name } . result_organization ro
join $ { stats_db_name } . dataset d on d . id = ro . id where cast ( d . year as int ) > 2003
2024-01-26 01:04:48 +01:00
group by ro . organization , d . year ; /* EOS */
2023-01-04 10:39:01 +01:00
2024-01-26 20:19:52 +01:00
CREATE TEMPORARY VIEW allsoftware as
2023-07-13 14:25:00 +02:00
SELECT s . year , ro . organization organization , count ( ro . id ) no_allsoftware FROM $ { stats_db_name } . result_organization ro
join $ { stats_db_name } . software s on s . id = ro . id where cast ( s . year as int ) > 2003
2024-01-26 01:04:48 +01:00
group by ro . organization , s . year ; /* EOS */
2023-01-04 10:39:01 +01:00
2024-01-26 20:19:52 +01:00
CREATE TEMPORARY VIEW allpubsshare as
select allpubs . year , pubs_oa . organization , pubs_oa . no_oapubs / allpubs . no_allpubs p from allpubs
join pubs_oa on allpubs . organization = pubs_oa . organization where cast ( allpubs . year as INT ) = cast ( pubs_oa . year as int ) ; /* EOS */
2023-01-04 10:39:01 +01:00
2024-01-26 20:19:52 +01:00
CREATE TEMPORARY VIEW alldatasetssshare as
2023-01-04 10:39:01 +01:00
select alldatasets . year , datasets_oa . organization , datasets_oa . no_oadatasets / alldatasets . no_alldatasets d
2024-01-26 20:19:52 +01:00
from alldatasets
join datasets_oa on alldatasets . organization = datasets_oa . organization where cast ( alldatasets . year as INT ) = cast ( datasets_oa . year as int ) ; /* EOS */
2023-01-04 10:39:01 +01:00
2024-01-26 20:19:52 +01:00
CREATE TEMPORARY VIEW allsoftwaresshare as
2023-01-04 10:39:01 +01:00
select allsoftware . year , software_oa . organization , software_oa . no_oasoftware / allsoftware . no_allsoftware s
2024-01-26 20:19:52 +01:00
from allsoftware
join software_oa on allsoftware . organization = software_oa . organization where cast ( allsoftware . year as INT ) = cast ( software_oa . year as int ) ; /* EOS */
2023-01-04 10:39:01 +01:00
2024-01-26 01:04:48 +01:00
drop table if exists $ { stats_db_name } . indi_org_openess_year purge ; /* EOS */
2023-01-04 10:39:01 +01:00
2023-07-13 14:25:00 +02:00
create table if not exists $ { stats_db_name } . indi_org_openess_year stored as parquet as
2023-09-19 13:25:42 +02:00
select cast ( allpubsshare . year as int ) year , allpubsshare . organization ,
2023-01-04 10:39:01 +01:00
( p + if ( isnull ( s ) , 0 , s ) + if ( isnull ( d ) , 0 , d ) ) / ( 1 + ( case when s is null then 0 else 1 end )
2022-08-02 12:39:34 +02:00
+ ( case when d is null then 0 else 1 end ) )
2024-01-26 20:19:52 +01:00
org_openess FROM allpubsshare
2023-09-19 13:25:42 +02:00
left outer join ( select cast ( year as int ) , organization , d from
2024-01-26 20:19:52 +01:00
alldatasetssshare ) tmp1
2022-08-02 12:39:34 +02:00
on tmp1 . organization = allpubsshare . organization and tmp1 . year = allpubsshare . year
2023-09-19 13:25:42 +02:00
left outer join ( select cast ( year as int ) , organization , s from
2024-01-26 20:19:52 +01:00
allsoftwaresshare ) tmp2
2024-01-26 01:04:48 +01:00
on tmp2 . organization = allpubsshare . organization and cast ( tmp2 . year as int ) = cast ( allpubsshare . year as int ) ; /* EOS */
2022-08-02 12:39:34 +02:00
2024-01-26 20:19:52 +01:00
DROP VIEW pubs_oa ; /* EOS */
DROP VIEW datasets_oa ; /* EOS */
DROP VIEW software_oa ; /* EOS */
DROP VIEW allpubs ; /* EOS */
DROP VIEW alldatasets ; /* EOS */
DROP VIEW allsoftware ; /* EOS */
DROP VIEW allpubsshare ; /* EOS */
DROP VIEW alldatasetssshare ; /* EOS */
DROP VIEW allsoftwaresshare ; /* EOS */
2023-01-04 10:39:01 +01:00
2024-01-26 01:04:48 +01:00
drop table if exists $ { stats_db_name } . indi_pub_has_preprint purge ; /* EOS */
2023-10-09 13:00:50 +02:00
2023-07-13 14:25:00 +02:00
create table if not exists $ { stats_db_name } . indi_pub_has_preprint stored as parquet as
2022-08-02 12:39:34 +02:00
select distinct p . id , coalesce ( has_preprint , 0 ) as has_preprint
2023-07-13 14:25:00 +02:00
from $ { stats_db_name } . publication_classifications p
2022-08-02 12:39:34 +02:00
left outer join (
select p . id , 1 as has_preprint
2023-07-13 14:25:00 +02:00
from $ { stats_db_name } . publication_classifications p
2022-08-02 12:39:34 +02:00
where p . type = ' Preprint ' ) tmp
2024-01-26 01:04:48 +01:00
on p . id = tmp . id ; /* EOS */
drop table if exists $ { stats_db_name } . indi_pub_in_subscribed purge ; /* EOS */
2022-08-02 12:39:34 +02:00
2023-07-13 14:25:00 +02:00
create table if not exists $ { stats_db_name } . indi_pub_in_subscribed stored as parquet as
2022-08-02 12:39:34 +02:00
select distinct p . id , coalesce ( is_subscription , 0 ) as is_subscription
2023-07-13 14:25:00 +02:00
from $ { stats_db_name } . publication p
2022-08-02 12:39:34 +02:00
left outer join (
2023-07-13 14:25:00 +02:00
select p . id , 1 as is_subscription from $ { stats_db_name } . publication p
join $ { stats_db_name } . indi_pub_gold_oa g on p . id = g . id
join $ { stats_db_name } . indi_pub_hybrid h on p . id = h . id
join $ { stats_db_name } . indi_pub_in_transformative t on p . id = t . id
2022-08-02 12:39:34 +02:00
where g . is_gold = 0 and h . is_hybrid = 0 and t . is_transformative = 0 ) tmp
2024-01-26 01:04:48 +01:00
on p . id = tmp . id ; /* EOS */
2022-08-02 12:39:34 +02:00
2024-01-26 01:04:48 +01:00
drop table if exists $ { stats_db_name } . indi_result_with_pid purge ; /* EOS */
2023-10-09 13:00:50 +02:00
2023-07-13 14:25:00 +02:00
create table if not exists $ { stats_db_name } . indi_result_with_pid as
2022-08-02 12:39:34 +02:00
select distinct p . id , coalesce ( result_with_pid , 0 ) as result_with_pid
2023-07-13 14:25:00 +02:00
from $ { stats_db_name } . result p
2022-08-02 12:39:34 +02:00
left outer join (
select p . id , 1 as result_with_pid
2023-07-13 14:25:00 +02:00
from $ { stats_db_name } . result_pids p ) tmp
2024-01-26 01:04:48 +01:00
on p . id = tmp . id ; /* EOS */
2022-08-02 12:39:34 +02:00
2024-01-26 20:19:52 +01:00
CREATE TEMPORARY VIEW pub_fos_totals as
2023-07-13 14:25:00 +02:00
select rf . id , count ( distinct lvl3 ) totals from $ { stats_db_name } . result_fos rf
2024-01-26 01:04:48 +01:00
group by rf . id ; /* EOS */
2023-06-02 12:34:16 +02:00
2024-01-26 01:04:48 +01:00
drop table if exists $ { stats_db_name } . indi_pub_interdisciplinarity purge ; /* EOS */
2023-10-09 13:00:50 +02:00
2023-07-13 14:25:00 +02:00
create table if not exists $ { stats_db_name } . indi_pub_interdisciplinarity as
2023-06-21 09:42:02 +02:00
select distinct p . id as id , coalesce ( is_interdisciplinary , 0 )
as is_interdisciplinary
2024-01-26 20:19:52 +01:00
from pub_fos_totals p
2023-06-02 12:34:16 +02:00
left outer join (
2024-01-26 20:19:52 +01:00
select pub_fos_totals . id , 1 as is_interdisciplinary from pub_fos_totals
2024-01-26 01:04:48 +01:00
where totals > 1 ) tmp on p . id = tmp . id ; /* EOS */
2023-06-02 12:34:16 +02:00
2024-01-26 20:19:52 +01:00
drop view pub_fos_totals ; /* EOS */
2023-06-02 13:13:10 +02:00
2024-01-26 01:04:48 +01:00
drop table if exists $ { stats_db_name } . indi_pub_bronze_oa purge ; /* EOS */
2023-10-09 13:00:50 +02:00
2023-11-15 13:32:18 +01:00
create table $ { stats_db_name } . indi_pub_bronze_oa stored as parquet as
2023-12-01 12:38:19 +01:00
select distinct pd . id , coalesce ( is_bronze_oa , 0 ) is_bronze_oa from $ { stats_db_name } . publication pd
left outer join ( select pd . id , 1 as is_bronze_oa from $ { stats_db_name } . publication pd
2023-10-09 13:00:50 +02:00
join $ { stats_db_name } . result_instance ri on ri . id = pd . id
join $ { stats_db_name } . indi_pub_gold_oa indi_gold on indi_gold . id = pd . id
2023-12-01 12:38:19 +01:00
join $ { stats_db_name } . indi_pub_hybrid indi_hybrid on indi_hybrid . id = pd . id
2023-10-09 13:00:50 +02:00
join $ { stats_db_name } . result_accessroute ra on ra . id = pd . id
2023-12-01 12:38:19 +01:00
join $ { stats_db_name } . datasource d on d . id = ri . hostedby
where indi_gold . is_gold = 0 and indi_hybrid . is_hybrid = 0
and ( ( d . type like ' %Journal% ' and ri . accessright ! = ' Closed Access '
and ri . accessright ! = ' Restricted ' and ri . license is null ) or ra . accessroute = ' bronze ' ) ) tmp
2024-01-26 01:04:48 +01:00
on pd . id = tmp . id ; /* EOS */
2023-06-14 18:09:09 +02:00
2024-01-26 20:19:52 +01:00
CREATE TEMPORARY VIEW project_year_result_year as
2023-09-01 09:57:02 +02:00
select p . id project_id , acronym , r . id result_id , r . year , p . end_year
from $ { stats_db_name } . project p
join $ { stats_db_name } . result_projects rp on p . id = rp . project
join $ { stats_db_name } . result r on r . id = rp . id
2024-01-26 01:04:48 +01:00
where p . end_year is NOT NULL and r . year is not null ; /* EOS */
2023-09-01 09:57:02 +02:00
2024-01-26 01:04:48 +01:00
drop table if exists $ { stats_db_name } . indi_is_project_result_after purge ; /* EOS */
2023-10-09 13:00:50 +02:00
2023-09-01 09:57:02 +02:00
create table if not exists $ { stats_db_name } . indi_is_project_result_after stored as parquet as
select pry . project_id , pry . acronym , pry . result_id ,
coalesce ( is_project_result_after , 0 ) as is_project_result_after
2024-01-26 20:19:52 +01:00
from project_year_result_year pry
2023-09-01 09:57:02 +02:00
left outer join ( select pry . project_id , pry . acronym , pry . result_id , 1 as is_project_result_after
2024-01-26 20:19:52 +01:00
from project_year_result_year pry
2024-01-26 01:04:48 +01:00
where pry . year > pry . end_year ) tmp on pry . result_id = tmp . result_id ; /* EOS */
2023-09-01 09:57:02 +02:00
2024-01-26 20:19:52 +01:00
drop view project_year_result_year ; /* EOS */
2023-09-01 09:57:02 +02:00
2024-01-26 20:19:52 +01:00
drop table if exists $ { stats_db_name } . indi_is_funder_plan_s purge ; /* EOS */
2023-10-09 13:00:50 +02:00
2023-09-01 09:57:02 +02:00
create table if not exists $ { stats_db_name } . indi_is_funder_plan_s stored as parquet as
select distinct f . id , f . name , coalesce ( is_funder_plan_s , 0 ) as is_funder_plan_s
from $ { stats_db_name } . funder f
left outer join ( select id , name , 1 as is_funder_plan_s from $ { stats_db_name } . funder
join stats_ext . plan_s_short on c_o_alition_s_organisation_funder = name ) tmp
2024-01-26 01:04:48 +01:00
on f . name = tmp . name ; /* EOS */
2023-09-01 09:57:02 +02:00
- - Funder Fairness
2024-01-26 20:19:52 +01:00
drop table if exists $ { stats_db_name } . indi_funder_fairness purge ; /* EOS */
2023-09-01 09:57:02 +02:00
create table if not exists $ { stats_db_name } . indi_funder_fairness stored as parquet as
with result_fair as
( select p . funder funder , count ( distinct rp . id ) no_result_fair from $ { stats_db_name } . result_projects rp
join $ { stats_db_name } . result r on r . id = rp . id
join $ { stats_db_name } . project p on p . id = rp . project
where ( r . title is not null ) and ( publisher is not null ) and ( abstract = true ) and ( year is not null ) and ( authors > 0 ) and cast ( year as int ) > 2003
group by p . funder ) ,
allresults as ( select p . funder funder , count ( distinct rp . id ) no_allresults from $ { stats_db_name } . result_projects rp
join $ { stats_db_name } . result r on r . id = rp . id
join $ { stats_db_name } . project p on p . id = rp . project
where cast ( year as int ) > 2003
group by p . funder )
select allresults . funder , result_fair . no_result_fair / allresults . no_allresults funder_fairness
from allresults
2024-01-26 01:04:48 +01:00
join result_fair on result_fair . funder = allresults . funder ; /* EOS */
2023-09-01 09:57:02 +02:00
- - RIs Fairness
2024-01-26 20:19:52 +01:00
drop table if exists $ { stats_db_name } . indi_ris_fairness purge ; /* EOS */
2023-10-09 13:00:50 +02:00
2023-09-01 09:57:02 +02:00
create table if not exists $ { stats_db_name } . indi_ris_fairness stored as parquet as
with result_contexts as
( select distinct rc . id , context . name ri_initiative from $ { stats_db_name } . result_concepts rc
join $ { stats_db_name } . concept on concept . id = rc . concept
join $ { stats_db_name } . category on category . id = concept . category
join $ { stats_db_name } . context on context . id = category . context ) ,
result_fair as
( select rc . ri_initiative ri_initiative , count ( distinct rc . id ) no_result_fair from result_contexts rc
join $ { stats_db_name } . result r on r . id = rc . id
where ( title is not null ) and ( publisher is not null ) and ( abstract = true ) and ( year is not null ) and ( authors > 0 ) and cast ( year as int ) > 2003
group by rc . ri_initiative ) ,
allresults as
( select rc . ri_initiative ri_initiative , count ( distinct rc . id ) no_allresults from result_contexts rc
join $ { stats_db_name } . result r on r . id = rc . id
where cast ( year as int ) > 2003
group by rc . ri_initiative )
select allresults . ri_initiative , result_fair . no_result_fair / allresults . no_allresults ris_fairness
from allresults
2024-01-26 01:04:48 +01:00
join result_fair on result_fair . ri_initiative = allresults . ri_initiative ; /* EOS */
2023-09-01 09:57:02 +02:00
- - Funder Openess
2024-01-26 20:19:52 +01:00
CREATE TEMPORARY VIEW pubs_oa as
2023-09-01 09:57:02 +02:00
select p . funder funder , count ( distinct rp . id ) no_oapubs from $ { stats_db_name } . result_projects rp
join $ { stats_db_name } . project p on p . id = rp . project
join $ { stats_db_name } . publication r on r . id = rp . id
join $ { stats_db_name } . result_instance ri on ri . id = r . id
where ( ri . accessright = ' Open Access ' or ri . accessright = ' Embargo ' or ri . accessright = ' Open Source ' )
and cast ( r . year as int ) > 2003
2024-01-26 01:04:48 +01:00
group by p . funder ; /* EOS */
2023-09-01 09:57:02 +02:00
2024-01-26 20:19:52 +01:00
CREATE TEMPORARY VIEW datasets_oa as
2023-09-01 09:57:02 +02:00
select p . funder funder , count ( distinct rp . id ) no_oadatasets from $ { stats_db_name } . result_projects rp
join $ { stats_db_name } . project p on p . id = rp . project
join $ { stats_db_name } . dataset r on r . id = rp . id
join $ { stats_db_name } . result_instance ri on ri . id = r . id
where ( ri . accessright = ' Open Access ' or ri . accessright = ' Embargo ' or ri . accessright = ' Open Source ' )
and cast ( r . year as int ) > 2003
2024-01-26 01:04:48 +01:00
group by p . funder ; /* EOS */
2023-09-01 09:57:02 +02:00
2024-01-26 20:19:52 +01:00
CREATE TEMPORARY VIEW software_oa as
2023-09-01 09:57:02 +02:00
select p . funder funder , count ( distinct rp . id ) no_oasoftware from $ { stats_db_name } . result_projects rp
join $ { stats_db_name } . project p on p . id = rp . project
join $ { stats_db_name } . software r on r . id = rp . id
join $ { stats_db_name } . result_instance ri on ri . id = r . id
where ( ri . accessright = ' Open Access ' or ri . accessright = ' Embargo ' or ri . accessright = ' Open Source ' )
and cast ( r . year as int ) > 2003
2024-01-26 01:04:48 +01:00
group by p . funder ; /* EOS */
2023-09-01 09:57:02 +02:00
2024-01-26 20:19:52 +01:00
CREATE TEMPORARY VIEW allpubs as
2023-09-01 09:57:02 +02:00
select p . funder funder , count ( distinct rp . id ) no_allpubs from $ { stats_db_name } . result_projects rp
join $ { stats_db_name } . project p on p . id = rp . project
join $ { stats_db_name } . publication r on r . id = rp . id
where cast ( r . year as int ) > 2003
2024-01-26 01:04:48 +01:00
group by p . funder ; /* EOS */
2023-09-01 09:57:02 +02:00
2024-01-26 20:19:52 +01:00
CREATE TEMPORARY VIEW alldatasets as
2023-09-01 09:57:02 +02:00
select p . funder funder , count ( distinct rp . id ) no_alldatasets from $ { stats_db_name } . result_projects rp
join $ { stats_db_name } . project p on p . id = rp . project
join $ { stats_db_name } . dataset r on r . id = rp . id
where cast ( r . year as int ) > 2003
2024-01-26 01:04:48 +01:00
group by p . funder ; /* EOS */
2023-09-01 09:57:02 +02:00
2024-01-26 20:19:52 +01:00
CREATE TEMPORARY VIEW allsoftware as
2023-09-01 09:57:02 +02:00
select p . funder funder , count ( distinct rp . id ) no_allsoftware from $ { stats_db_name } . result_projects rp
join $ { stats_db_name } . project p on p . id = rp . project
join $ { stats_db_name } . software r on r . id = rp . id
where cast ( r . year as int ) > 2003
2024-01-26 01:04:48 +01:00
group by p . funder ; /* EOS */
2023-09-01 09:57:02 +02:00
2024-01-26 20:19:52 +01:00
CREATE TEMPORARY VIEW allpubsshare as
select pubs_oa . funder , pubs_oa . no_oapubs / allpubs . no_allpubs p from allpubs
join pubs_oa on allpubs . funder = pubs_oa . funder ; /* EOS */
2023-09-01 09:57:02 +02:00
2024-01-26 20:19:52 +01:00
CREATE TEMPORARY VIEW alldatasetssshare as
2023-09-01 09:57:02 +02:00
select datasets_oa . funder , datasets_oa . no_oadatasets / alldatasets . no_alldatasets d
2024-01-26 20:19:52 +01:00
from alldatasets
join datasets_oa on alldatasets . funder = datasets_oa . funder ; /* EOS */
2023-09-01 09:57:02 +02:00
2024-01-26 20:19:52 +01:00
CREATE TEMPORARY VIEW allsoftwaresshare as
2023-09-01 09:57:02 +02:00
select software_oa . funder , software_oa . no_oasoftware / allsoftware . no_allsoftware s
2024-01-26 20:19:52 +01:00
from allsoftware
join software_oa on allsoftware . funder = software_oa . funder ; /* EOS */
2023-09-01 09:57:02 +02:00
2024-01-26 20:19:52 +01:00
drop table if exists $ { stats_db_name } . indi_funder_openess purge ; /* EOS */
2023-10-09 13:00:50 +02:00
2023-09-01 09:57:02 +02:00
create table if not exists $ { stats_db_name } . indi_funder_openess stored as parquet as
select allpubsshare . funder ,
( p + if ( isnull ( s ) , 0 , s ) + if ( isnull ( d ) , 0 , d ) ) / ( 1 + ( case when s is null then 0 else 1 end )
+ ( case when d is null then 0 else 1 end ) )
2024-01-26 20:19:52 +01:00
funder_openess FROM allpubsshare
2023-09-01 09:57:02 +02:00
left outer join ( select funder , d from
2024-01-26 20:19:52 +01:00
alldatasetssshare ) tmp1
2023-09-01 09:57:02 +02:00
on tmp1 . funder = allpubsshare . funder
left outer join ( select funder , s from
2024-01-26 20:19:52 +01:00
allsoftwaresshare ) tmp2
2024-01-26 01:04:48 +01:00
on tmp2 . funder = allpubsshare . funder ; /* EOS */
2024-01-26 20:19:52 +01:00
DROP VIEW pubs_oa ; /* EOS */
DROP VIEW datasets_oa ; /* EOS */
DROP VIEW software_oa ; /* EOS */
DROP VIEW allpubs ; /* EOS */
DROP VIEW alldatasets ; /* EOS */
DROP VIEW allsoftware ; /* EOS */
DROP VIEW allpubsshare ; /* EOS */
DROP VIEW alldatasetssshare ; /* EOS */
DROP VIEW allsoftwaresshare ; /* EOS */
2023-09-01 09:57:02 +02:00
- - RIs Openess
2024-01-26 20:19:52 +01:00
CREATE TEMPORARY VIEW result_contexts as
2023-09-01 09:57:02 +02:00
select distinct rc . id , context . name ri_initiative from $ { stats_db_name } . result_concepts rc
join $ { stats_db_name } . concept on concept . id = rc . concept
join $ { stats_db_name } . category on category . id = concept . category
2024-01-26 01:04:48 +01:00
join $ { stats_db_name } . context on context . id = category . context ; /* EOS */
2023-09-01 09:57:02 +02:00
2024-01-26 20:19:52 +01:00
CREATE TEMPORARY VIEW pubs_oa as
select rp . ri_initiative ri_initiative , count ( distinct rp . id ) no_oapubs from result_contexts rp
2023-09-01 09:57:02 +02:00
join $ { stats_db_name } . publication r on r . id = rp . id
join $ { stats_db_name } . result_instance ri on ri . id = r . id
where ( ri . accessright = ' Open Access ' or ri . accessright = ' Embargo ' or ri . accessright = ' Open Source ' )
and cast ( r . year as int ) > 2003
2024-01-26 01:04:48 +01:00
group by rp . ri_initiative ; /* EOS */
2023-09-01 09:57:02 +02:00
2024-01-26 20:19:52 +01:00
CREATE TEMPORARY VIEW datasets_oa as
select rp . ri_initiative ri_initiative , count ( distinct rp . id ) no_oadatasets from result_contexts rp
2023-09-01 09:57:02 +02:00
join $ { stats_db_name } . dataset r on r . id = rp . id
join $ { stats_db_name } . result_instance ri on ri . id = r . id
where ( ri . accessright = ' Open Access ' or ri . accessright = ' Embargo ' or ri . accessright = ' Open Source ' )
and cast ( r . year as int ) > 2003
2024-01-26 01:04:48 +01:00
group by rp . ri_initiative ; /* EOS */
2023-09-01 09:57:02 +02:00
2024-01-26 20:19:52 +01:00
CREATE TEMPORARY VIEW software_oa as
select rp . ri_initiative ri_initiative , count ( distinct rp . id ) no_oasoftware from result_contexts rp
2023-09-01 09:57:02 +02:00
join $ { stats_db_name } . software r on r . id = rp . id
join $ { stats_db_name } . result_instance ri on ri . id = r . id
where ( ri . accessright = ' Open Access ' or ri . accessright = ' Embargo ' or ri . accessright = ' Open Source ' )
and cast ( r . year as int ) > 2003
2024-01-26 01:04:48 +01:00
group by rp . ri_initiative ; /* EOS */
2023-09-01 09:57:02 +02:00
2024-01-26 20:19:52 +01:00
CREATE TEMPORARY VIEW allpubs as
select rp . ri_initiative ri_initiative , count ( distinct rp . id ) no_allpubs from result_contexts rp
2023-09-01 09:57:02 +02:00
join $ { stats_db_name } . publication r on r . id = rp . id
where cast ( r . year as int ) > 2003
2024-01-26 01:04:48 +01:00
group by rp . ri_initiative ; /* EOS */
2023-09-01 09:57:02 +02:00
2024-01-26 20:19:52 +01:00
CREATE TEMPORARY VIEW alldatasets as
select rp . ri_initiative ri_initiative , count ( distinct rp . id ) no_alldatasets from result_contexts rp
2023-09-01 09:57:02 +02:00
join $ { stats_db_name } . dataset r on r . id = rp . id
where cast ( r . year as int ) > 2003
2024-01-26 01:04:48 +01:00
group by rp . ri_initiative ; /* EOS */
2023-09-01 09:57:02 +02:00
2024-01-26 20:19:52 +01:00
CREATE TEMPORARY VIEW allsoftware as
select rp . ri_initiative ri_initiative , count ( distinct rp . id ) no_allsoftware from result_contexts rp
2023-09-01 09:57:02 +02:00
join $ { stats_db_name } . software r on r . id = rp . id
where cast ( r . year as int ) > 2003
2024-01-26 01:04:48 +01:00
group by rp . ri_initiative ; /* EOS */
2023-09-01 09:57:02 +02:00
2024-01-26 20:19:52 +01:00
CREATE TEMPORARY VIEW allpubsshare as
select pubs_oa . ri_initiative , pubs_oa . no_oapubs / allpubs . no_allpubs p from allpubs
join pubs_oa on allpubs . ri_initiative = pubs_oa . ri_initiative ; /* EOS */
2023-09-01 09:57:02 +02:00
2024-01-26 20:19:52 +01:00
CREATE TEMPORARY VIEW alldatasetssshare as
2023-09-01 09:57:02 +02:00
select datasets_oa . ri_initiative , datasets_oa . no_oadatasets / alldatasets . no_alldatasets d
2024-01-26 20:19:52 +01:00
from alldatasets
join datasets_oa on alldatasets . ri_initiative = datasets_oa . ri_initiative ; /* EOS */
2023-09-01 09:57:02 +02:00
2024-01-26 20:19:52 +01:00
CREATE TEMPORARY VIEW allsoftwaresshare as
2023-09-01 09:57:02 +02:00
select software_oa . ri_initiative , software_oa . no_oasoftware / allsoftware . no_allsoftware s
2024-01-26 20:19:52 +01:00
from allsoftware
join software_oa on allsoftware . ri_initiative = software_oa . ri_initiative ; /* EOS */
2023-09-01 09:57:02 +02:00
2024-01-26 20:19:52 +01:00
drop table if exists $ { stats_db_name } . indi_ris_openess purge ; /* EOS */
2023-10-09 13:00:50 +02:00
2023-09-01 09:57:02 +02:00
create table if not exists $ { stats_db_name } . indi_ris_openess stored as parquet as
select allpubsshare . ri_initiative ,
( p + if ( isnull ( s ) , 0 , s ) + if ( isnull ( d ) , 0 , d ) ) / ( 1 + ( case when s is null then 0 else 1 end )
+ ( case when d is null then 0 else 1 end ) )
2024-01-26 20:19:52 +01:00
ris_openess FROM allpubsshare
2023-09-01 09:57:02 +02:00
left outer join ( select ri_initiative , d from
2024-01-26 20:19:52 +01:00
alldatasetssshare ) tmp1
2023-09-01 09:57:02 +02:00
on tmp1 . ri_initiative = allpubsshare . ri_initiative
left outer join ( select ri_initiative , s from
2024-01-26 20:19:52 +01:00
allsoftwaresshare ) tmp2
2024-01-26 01:04:48 +01:00
on tmp2 . ri_initiative = allpubsshare . ri_initiative ; /* EOS */
2024-01-26 20:19:52 +01:00
DROP VIEW result_contexts ; /* EOS */
DROP VIEW pubs_oa ; /* EOS */
DROP VIEW datasets_oa ; /* EOS */
DROP VIEW software_oa ; /* EOS */
DROP VIEW allpubs ; /* EOS */
DROP VIEW alldatasets ; /* EOS */
DROP VIEW allsoftware ; /* EOS */
DROP VIEW allpubsshare ; /* EOS */
DROP VIEW alldatasetssshare ; /* EOS */
DROP VIEW allsoftwaresshare ; /* EOS */
2023-09-01 09:57:02 +02:00
- - Funder Findability
2024-01-26 20:19:52 +01:00
drop table if exists $ { stats_db_name } . indi_funder_findable purge ; /* EOS */
2023-10-09 13:00:50 +02:00
2023-09-01 09:57:02 +02:00
create table if not exists $ { stats_db_name } . indi_funder_findable stored as parquet as
with result_findable as
( select p . funder funder , count ( distinct rp . id ) no_result_findable from $ { stats_db_name } . result_projects rp
join $ { stats_db_name } . publication r on r . id = rp . id
join $ { stats_db_name } . project p on p . id = rp . project
join $ { stats_db_name } . result_pids rpi on rpi . id = r . id
where cast ( year as int ) > 2003
group by p . funder ) ,
allresults as ( select p . funder funder , count ( distinct rp . id ) no_allresults from $ { stats_db_name } . result_projects rp
join $ { stats_db_name } . result r on r . id = rp . id
join $ { stats_db_name } . project p on p . id = rp . project
where cast ( year as int ) > 2003
group by p . funder )
select allresults . funder , result_findable . no_result_findable / allresults . no_allresults funder_findable
from allresults
2024-01-26 01:04:48 +01:00
join result_findable on result_findable . funder = allresults . funder ; /* EOS */
2023-09-01 09:57:02 +02:00
- - RIs Findability
2024-01-26 20:19:52 +01:00
drop table if exists $ { stats_db_name } . indi_ris_findable purge ; /* EOS */
2023-10-09 13:00:50 +02:00
2023-09-01 09:57:02 +02:00
create table if not exists $ { stats_db_name } . indi_ris_findable stored as parquet as
with result_contexts as
( select distinct rc . id , context . name ri_initiative from $ { stats_db_name } . result_concepts rc
join $ { stats_db_name } . concept on concept . id = rc . concept
join $ { stats_db_name } . category on category . id = concept . category
join $ { stats_db_name } . context on context . id = category . context ) ,
result_findable as
( select rc . ri_initiative ri_initiative , count ( distinct rc . id ) no_result_findable from result_contexts rc
join $ { stats_db_name } . result r on r . id = rc . id
join $ { stats_db_name } . result_pids rp on rp . id = r . id
where cast ( r . year as int ) > 2003
group by rc . ri_initiative ) ,
allresults as
( select rc . ri_initiative ri_initiative , count ( distinct rc . id ) no_allresults from result_contexts rc
join $ { stats_db_name } . result r on r . id = rc . id
where cast ( r . year as int ) > 2003
group by rc . ri_initiative )
select allresults . ri_initiative , result_findable . no_result_findable / allresults . no_allresults ris_findable
from allresults
2024-01-26 01:04:48 +01:00
join result_findable on result_findable . ri_initiative = allresults . ri_initiative ; /* EOS */
2023-09-01 09:57:02 +02:00
2023-12-01 14:00:18 +01:00
create table if not exists $ { stats_db_name } . indi_pub_publicly_funded stored as parquet as
with org_names_pids as
( select org . id , name , pid from $ { stats_db_name } . organization org
join $ { stats_db_name } . organization_pids op on org . id = op . id ) ,
publicly_funded_orgs as
( select distinct name from
( select pf . name from stats_ext . insitutions_for_publicly_funded pf
join $ { stats_db_name } . fundref f on f . name = pf . name where f . type = ' government '
union all
select pf . name from stats_ext . insitutions_for_publicly_funded pf
join $ { stats_db_name } . project p on p . funder = pf . name
union all
2024-01-08 23:47:09 +01:00
select op . name from stats_ext . insitutions_for_publicly_funded pf
2023-12-01 14:00:18 +01:00
join org_names_pids op on ( op . name = pf . name or op . pid = pf . ror )
and pf . publicly_funded = ' yes ' ) foo )
select distinct p . id , coalesce ( publicly_funded , 0 ) as publicly_funded
from $ { stats_db_name } . publication p
left outer join (
2023-12-22 09:29:20 +01:00
select distinct ro . id , 1 as publicly_funded from $ { stats_db_name } . result_organization ro
2023-12-01 14:00:18 +01:00
join $ { stats_db_name } . organization o on o . id = ro . organization
2024-01-26 01:04:48 +01:00
join publicly_funded_orgs pfo on o . name = pfo . name ) tmp on p . id = tmp . id ; /* EOS */
2023-12-22 09:29:20 +01:00
2024-01-29 20:51:47 +01:00
drop table if exists $ { stats_db_name } . indi_pub_green_with_license purge ; /* EOS */
2023-12-22 09:29:20 +01:00
create table $ { stats_db_name } . indi_pub_green_with_license stored as parquet as
select distinct p . id , coalesce ( green_with_license , 0 ) as green_with_license
from $ { stats_db_name } . publication p
left outer join (
2024-03-21 11:44:30 +01:00
select distinct p . id , 1 as green_with_license from $ { stats_db_name } . publication p
join $ { stats_db_name } . result_instance ri on ri . id = p . id
join $ { stats_db_name } . datasource on datasource . id = ri . hostedby
where ri . license is not null and datasource . type like ' %Repository% ' and datasource . name ! = ' Other ' ) tmp on p . id = tmp . id ; /* EOS */
2023-12-22 09:29:20 +01:00
2024-01-29 20:51:47 +01:00
drop table if exists $ { stats_db_name } . result_country purge ; /* EOS */
2023-12-22 09:29:20 +01:00
create table $ { stats_db_name } . result_country stored as parquet as
2024-03-07 11:15:19 +01:00
select distinct ro . id , coalesce ( o . country , f . country )
2024-01-08 15:01:26 +01:00
from $ { stats_db_name } . result_organization ro
left outer join $ { stats_db_name } . organization o on o . id = ro . organization
left outer join $ { stats_db_name } . result_projects rp on rp . id = ro . id
left outer join $ { stats_db_name } . project p on p . id = rp . project
left outer join $ { stats_db_name } . funder f on f . name = p . funder
where coalesce ( o . country , f . country ) IS NOT NULL ;
2024-03-07 11:15:19 +01:00
drop table if exists $ { stats_db_name } . indi_result_oa_with_license purge ; /* EOS */
create table $ { stats_db_name } . indi_result_oa_with_license stored as parquet as
select distinct r . id , coalesce ( oa_with_license , 0 ) as oa_with_license
from $ { stats_db_name } . result r
left outer join ( select distinct r . id , 1 as oa_with_license from $ { stats_db_name } . result r
join $ { stats_db_name } . result_licenses rl on rl . id = r . id where r . bestlicence = ' Open Access ' ) tmp on r . id = tmp . id ; /* EOS */
drop table if exists $ { stats_db_name } . indi_result_oa_without_license purge ; /* EOS */
create table $ { stats_db_name } . indi_result_oa_without_license stored as parquet as
with without_license as
( select distinct id from $ { stats_db_name } . indi_result_oa_with_license
where oa_with_license = 0 )
select distinct r . id , coalesce ( oa_without_license , 0 ) as oa_without_license
from $ { stats_db_name } . result r
left outer join ( select distinct r . id , 1 as oa_without_license
from $ { stats_db_name } . result r
join without_license wl on wl . id = r . id
where r . bestlicence = ' Open Access ' ) tmp on r . id = tmp . id ; /* EOS */
drop table if exists $ { stats_db_name } . indi_result_under_transformative purge ; /* EOS */
create table $ { stats_db_name } . indi_result_under_transformative stored as parquet as
2024-03-21 10:45:08 +01:00
with transformative_dois as (
select distinct doi from stats_ext . transformative_facts )
2024-03-07 11:15:19 +01:00
select distinct r . id , coalesce ( under_transformative , 0 ) as under_transformative
from $ { stats_db_name } . result r
2024-03-21 10:45:08 +01:00
left outer join (
select distinct rp . id , 1 as under_transformative
from $ { stats_db_name } . result_pids rp join $ { stats_db_name } . result r on r . id = rp . id
join transformative_dois td on td . doi = rp . pid ) tmp on r . id = tmp . id ; /* EOS */