resolving conflicts on step16-createIndicatorsTables.sql

This commit is contained in:
Claudio Atzori 2024-03-26 12:17:52 +01:00
parent f6601ea7d1
commit d70793847d
1 changed files with 243 additions and 405 deletions

View File

@ -1,363 +1,229 @@
-- Sprint 1 ----
drop table if exists ${stats_db_name}.indi_pub_green_oa purge; /*EOS*/
--create table if not exists ${stats_db_name}.indi_pub_green_oa stored as parquet as
--select distinct p.id, coalesce(green_oa, 0) as green_oa
--from ${stats_db_name}.publication p
-- left outer join (
-- select p.id, 1 as green_oa
-- from ${stats_db_name}.publication p
-- join ${stats_db_name}.result_instance ri on ri.id = p.id
-- join ${stats_db_name}.datasource on datasource.id = ri.hostedby
-- where datasource.type like '%Repository%'
-- and (ri.accessright = 'Open Access'
-- or ri.accessright = 'Embargo' or ri.accessright = 'Open Source')) tmp
-- on p.id= tmp.id;
create table if not exists ${stats_db_name}.indi_pub_green_oa stored as parquet as
select distinct p.id, coalesce(green_oa, 0) as green_oa
from ${stats_db_name}.publication p
left outer join (
left outer join (
select p.id, 1 as green_oa
from ${stats_db_name}.publication p
join ${stats_db_name}.result_instance ri on ri.id = p.id
join ${stats_db_name}.datasource on datasource.id = ri.hostedby
where datasource.type like '%Repository%'
and (ri.accessright = 'Open Access'
or ri.accessright = 'Embargo' or ri.accessright = 'Open Source') and datasource.name!='Other') tmp
on p.id= tmp.id; /*EOS*/
where datasource.type like '%Repository%' and (ri.accessright = 'Open Access' or ri.accessright = 'Embargo' or ri.accessright = 'Open Source') and datasource.name!='Other') tmp on p.id= tmp.id; /*EOS*/
drop table if exists ${stats_db_name}.indi_pub_grey_lit purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_pub_grey_lit stored as parquet as
select distinct p.id, coalesce(grey_lit, 0) as grey_lit
from ${stats_db_name}.publication p
left outer join (
left outer join (
select p.id, 1 as grey_lit
from ${stats_db_name}.publication p
join ${stats_db_name}.result_classifications rt on rt.id = p.id
where rt.type not in ('Article','Part of book or chapter of book','Book','Doctoral thesis','Master thesis','Data Paper', 'Thesis', 'Bachelor thesis', 'Conference object') and
not exists (select 1 from ${stats_db_name}.result_classifications rc where type ='Other literature type'
and rc.id=p.id)) tmp on p.id=tmp.id; /*EOS*/
where rt.type not in ('Article','Part of book or chapter of book','Book','Doctoral thesis','Master thesis','Data Paper', 'Thesis', 'Bachelor thesis', 'Conference object')
and not exists (select 1 from ${stats_db_name}.result_classifications rc where type ='Other literature type' and rc.id=p.id)) tmp on p.id=tmp.id; /*EOS*/
drop table if exists ${stats_db_name}.indi_pub_doi_from_crossref purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_pub_doi_from_crossref stored as parquet as
select distinct p.id, coalesce(doi_from_crossref, 0) as doi_from_crossref
from ${stats_db_name}.publication p
left outer join
(select ri.id, 1 as doi_from_crossref from ${stats_db_name}.result_instance ri
left outer join (
select ri.id, 1 as doi_from_crossref from ${stats_db_name}.result_instance ri
join ${stats_db_name}.datasource d on d.id = ri.collectedfrom
where pidtype='Digital Object Identifier' and d.name ='Crossref') tmp
on tmp.id=p.id; /*EOS*/
where pidtype='Digital Object Identifier' and d.name ='Crossref') tmp on tmp.id=p.id; /*EOS*/
-- Sprint 2 ----
drop table if exists ${stats_db_name}.indi_result_has_cc_licence purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_result_has_cc_licence stored as parquet as
select distinct r.id, (case when lic='' or lic is null then 0 else 1 end) as has_cc_license
from ${stats_db_name}.result r
left outer join (select r.id, license.type as lic from ${stats_db_name}.result r
left outer join (
select r.id, license.type as lic from ${stats_db_name}.result r
join ${stats_db_name}.result_licenses as license on license.id = r.id
where lower(license.type) LIKE '%creativecommons.org%' OR lower(license.type) LIKE '%cc-%') tmp
on r.id= tmp.id; /*EOS*/
where lower(license.type) LIKE '%creativecommons.org%' OR lower(license.type) LIKE '%cc %') tmp on r.id= tmp.id; /*EOS*/
drop table if exists ${stats_db_name}.indi_result_has_cc_licence_url purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_result_has_cc_licence_url stored as parquet as
select distinct r.id, case when lic_host='' or lic_host is null then 0 else 1 end as has_cc_license_url
from ${stats_db_name}.result r
left outer join (select r.id, lower(parse_url(license.type, "HOST")) as lic_host
left outer join (
select r.id, lower(parse_url(license.type, "HOST")) as lic_host
from ${stats_db_name}.result r
join ${stats_db_name}.result_licenses as license on license.id = r.id
WHERE lower(parse_url(license.type, "HOST")) = "creativecommons.org") tmp
on r.id= tmp.id; /*EOS*/
WHERE lower(parse_url(license.type, "HOST")) = "creativecommons.org") tmp on r.id= tmp.id; /*EOS*/
drop table if exists ${stats_db_name}.indi_pub_has_abstract purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_pub_has_abstract stored as parquet as
select distinct publication.id, cast(coalesce(abstract, true) as int) has_abstract
from ${stats_db_name}.publication; /*EOS*/
drop table if exists ${stats_db_name}.indi_result_with_orcid purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_result_with_orcid stored as parquet as
select distinct r.id, coalesce(has_orcid, 0) as has_orcid
from ${stats_db_name}.result r
left outer join (select id, 1 as has_orcid from ${stats_db_name}.result_orcid) tmp
on r.id= tmp.id; /*EOS*/
left outer join (
select id, 1 as has_orcid from ${stats_db_name}.result_orcid) tmp on r.id= tmp.id; /*EOS*/
---- Sprint 3 ----
drop table if exists ${stats_db_name}.indi_funded_result_with_fundref purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_funded_result_with_fundref stored as parquet as
select distinct r.result as id, coalesce(fundref, 0) as fundref
from ${stats_db_name}.project_results r
left outer join (select distinct result, 1 as fundref from ${stats_db_name}.project_results
where provenance='Harvested') tmp
on r.result= tmp.result; /*EOS*/
-- create table indi_result_org_collab stored as parquet as
-- select o1.organization org1, o2.organization org2, count(distinct o1.id) as collaborations
-- from result_organization as o1
-- join result_organization as o2 on o1.id=o2.id and o1.organization!=o2.organization
-- group by o1.organization, o2.organization;
--
-- compute stats indi_result_org_collab;
--
create TEMPORARY VIEW tmp AS SELECT ro.organization organization, ro.id, o.name from ${stats_db_name}.result_organization ro
join ${stats_db_name}.organization o on o.id=ro.organization where o.name is not null; /*EOS*/
left outer join (
select distinct result, 1 as fundref from ${stats_db_name}.project_results where provenance='Harvested') tmp on r.result= tmp.result; /*EOS*/
drop table if exists ${stats_db_name}.indi_result_org_collab purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_result_org_collab stored as parquet as
select o1.organization org1, o1.name org1name1, o2.organization org2, o2.name org2name2, count(o1.id) as collaborations
from tmp as o1
join tmp as o2 where o1.id=o2.id and o1.organization!=o2.organization and o1.name!=o2.name
group by o1.organization, o2.organization, o1.name, o2.name; /*EOS*/
DROP VIEW if exists tmp; /*EOS*/
create TEMPORARY VIEW tmp AS
select distinct ro.organization organization, ro.id, o.name, o.country from ${stats_db_name}.result_organization ro
join ${stats_db_name}.organization o on o.id=ro.organization where country <> 'UNKNOWN' and o.name is not null; /*EOS*/
WITH tmp AS (
SELECT ro.organization organization, ro.id, o.name
from ${stats_db_name}.result_organization ro
join ${stats_db_name}.organization o on o.id=ro.organization where o.name is not null)
select o1.organization org1, o1.name org1name1, o2.organization org2, o2.name org2name2, count(o1.id) as collaborations
from tmp as o1
join tmp as o2 where o1.id=o2.id and o1.organization!=o2.organization and o1.name!=o2.name
group by o1.organization, o2.organization, o1.name, o2.name; /*EOS*/
drop table if exists ${stats_db_name}.indi_result_org_country_collab purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_result_org_country_collab stored as parquet as
select o1.organization org1,o1.name org1name1, o2.country country2, count(o1.id) as collaborations
from tmp as o1 join tmp as o2 on o1.id=o2.id
where o1.id=o2.id and o1.country!=o2.country
group by o1.organization, o1.id, o1.name, o2.country; /*EOS*/
drop table if exists tmp purge; /*EOS*/
create TEMPORARY VIEW tmp AS
select o.id organization, o.name, ro.project as project from ${stats_db_name}.organization o
join ${stats_db_name}.organization_projects ro on o.id=ro.id where o.name is not null; /*EOS*/
WITH tmp AS (
select distinct ro.organization organization, ro.id, o.name, o.country
from ${stats_db_name}.result_organization ro
join ${stats_db_name}.organization o on o.id=ro.organization
where country <> 'UNKNOWN' and o.name is not null)
select o1.organization org1,o1.name org1name1, o2.country country2, count(o1.id) as collaborations
from tmp as o1 join tmp as o2 on o1.id=o2.id
where o1.id=o2.id and o1.country!=o2.country
group by o1.organization, o1.id, o1.name, o2.country; /*EOS*/
drop table if exists ${stats_db_name}.indi_project_collab_org purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_project_collab_org stored as parquet as
select o1.organization org1,o1.name orgname1, o2.organization org2, o2.name orgname2, count(distinct o1.project) as collaborations
from tmp as o1
WITH tmp AS (
select o.id organization, o.name, ro.project as project
from ${stats_db_name}.organization o
join ${stats_db_name}.organization_projects ro on o.id=ro.id where o.name is not null)
select o1.organization org1,o1.name orgname1, o2.organization org2, o2.name orgname2, count(distinct o1.project) as collaborations
from tmp as o1
join tmp as o2 on o1.project=o2.project
where o1.organization<>o2.organization and o1.name<>o2.name
group by o1.name,o2.name, o1.organization, o2.organization; /*EOS*/
DROP VIEW if exists tmp; /*EOS*/
create TEMPORARY VIEW tmp AS
select o.id organization, o.name, o.country , ro.project as project from ${stats_db_name}.organization o
join ${stats_db_name}.organization_projects ro on o.id=ro.id
and o.country <> 'UNKNOWN' and o.name is not null; /*EOS*/
where o1.organization<>o2.organization and o1.name<>o2.name
group by o1.name,o2.name, o1.organization, o2.organization; /*EOS*/
drop table if exists ${stats_db_name}.indi_project_collab_org_country purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_project_collab_org_country stored as parquet as
select o1.organization org1,o1.name org1name, o2.country country2, count(distinct o1.project) as collaborations
from tmp as o1
WITH tmp AS (
select o.id organization, o.name, o.country , ro.project as project
from ${stats_db_name}.organization o
join ${stats_db_name}.organization_projects ro on o.id=ro.id and o.country <> 'UNKNOWN' and o.name is not null)
select o1.organization org1,o1.name org1name, o2.country country2, count(distinct o1.project) as collaborations
from tmp as o1
join tmp as o2 on o1.project=o2.project
where o1.organization<>o2.organization and o1.country<>o2.country
group by o1.organization, o2.country, o1.name; /*EOS*/
DROP VIEW if exists tmp; /*EOS*/
where o1.organization<>o2.organization and o1.country<>o2.country
group by o1.organization, o2.country, o1.name; /*EOS*/
drop table if exists ${stats_db_name}.indi_funder_country_collab purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_funder_country_collab stored as parquet as
with tmp as (select funder, project, country from ${stats_db_name}.organization_projects op
join ${stats_db_name}.organization o on o.id=op.id
join ${stats_db_name}.project p on p.id=op.project
where country <> 'UNKNOWN')
select f1.funder, f1.country as country1, f2.country as country2, count(distinct f1.project) as collaborations
from tmp as f1
select f1.funder, f1.country as country1, f2.country as country2, count(distinct f1.project) as collaborations
from tmp as f1
join tmp as f2 on f1.project=f2.project
where f1.country<>f2.country
group by f1.funder, f2.country, f1.country; /*EOS*/
create TEMPORARY VIEW tmp AS
select distinct country, ro.id as result from ${stats_db_name}.organization o
join ${stats_db_name}.result_organization ro on o.id=ro.organization
where country <> 'UNKNOWN' and o.name is not null; /*EOS*/
where f1.country<>f2.country
group by f1.funder, f2.country, f1.country; /*EOS*/
drop table if exists ${stats_db_name}.indi_result_country_collab purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_result_country_collab stored as parquet as
select o1.country country1, o2.country country2, count(o1.result) as collaborations
from tmp as o1
WITH tmp AS (
select distinct country, ro.id as result from ${stats_db_name}.organization o
join ${stats_db_name}.result_organization ro on o.id=ro.organization
where country <> 'UNKNOWN' and o.name is not null)
select o1.country country1, o2.country country2, count(o1.result) as collaborations
from tmp as o1
join tmp as o2 on o1.result=o2.result
where o1.country<>o2.country
group by o1.country, o2.country; /*EOS*/
where o1.country<>o2.country
group by o1.country, o2.country; /*EOS*/
DROP VIEW if exists tmp; /*EOS*/
---- Sprint 4 ----
drop table if exists ${stats_db_name}.indi_pub_diamond purge; /*EOS*/
--create table if not exists ${stats_db_name}.indi_pub_diamond stored as parquet as
--select distinct pd.id, coalesce(in_diamond_journal, 0) as in_diamond_journal
--from ${stats_db_name}.publication_datasources pd
-- left outer join (
-- select pd.id, 1 as in_diamond_journal from ${stats_db_name}.publication_datasources pd
-- join ${stats_db_name}.datasource d on d.id=pd.datasource
-- join STATS_EXT.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_online=d.issn_online)
-- and (ps.journal_is_in_doaj=true or ps.journal_is_oa=true) and ps.has_apc=false) tmp
-- on pd.id=tmp.id;
create table if not exists ${stats_db_name}.indi_pub_diamond stored as parquet as
select distinct pd.id, coalesce(in_diamond_journal, 0) as in_diamond_journal
from ${stats_db_name}.publication_datasources pd
left outer join (select pd.id, 1 as in_diamond_journal from ${stats_db_name}.publication_datasources pd
join ${stats_db_name}.datasource d on d.id=pd.datasource
join STATS_EXT.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_online=d.issn_online)
and (ps.journal_is_in_doaj=true or ps.journal_is_oa=true) and ps.has_apc=false) tmp
on pd.id=tmp.id; /*EOS*/
drop table if exists ${stats_db_name}.indi_pub_in_transformative purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_pub_in_transformative stored as parquet as
select distinct pd.id, coalesce(is_transformative, 0) as is_transformative
from ${stats_db_name}.publication pd
select distinct pd.id, coalesce(in_diamond_journal, 0) as in_diamond_journal
from ${stats_db_name}.publication_datasources pd
left outer join (
select pd.id, 1 as is_transformative from ${stats_db_name}.publication_datasources pd
select pd.id, 1 as in_diamond_journal
from ${stats_db_name}.publication_datasources pd
join ${stats_db_name}.datasource d on d.id=pd.datasource
join STATS_EXT.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_online=d.issn_online)
and ps.is_transformative_journal=true) tmp
on pd.id=tmp.id; /*EOS*/
and (ps.journal_is_in_doaj=true or ps.journal_is_oa=true) and ps.has_apc=false) tmp on pd.id=tmp.id; /*EOS*/
drop table if exists ${stats_db_name}.indi_pub_in_transformative purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_pub_in_transformative stored as parquet as
select distinct pd.id, coalesce(is_transformative, 0) as is_transformative
from ${stats_db_name}.publication pd
left outer join (
select pd.id, 1 as is_transformative
from ${stats_db_name}.publication_datasources pd
join ${stats_db_name}.datasource d on d.id=pd.datasource
join STATS_EXT.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_online=d.issn_online)
and ps.is_transformative_journal=true) tmp on pd.id=tmp.id; /*EOS*/
drop table if exists ${stats_db_name}.indi_pub_closed_other_open purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_pub_closed_other_open stored as parquet as
select distinct ri.id, coalesce(pub_closed_other_open, 0) as pub_closed_other_open from ${stats_db_name}.result_instance ri
left outer join
(select ri.id, 1 as pub_closed_other_open from ${stats_db_name}.result_instance ri
select distinct ri.id, coalesce(pub_closed_other_open, 0) as pub_closed_other_open
from ${stats_db_name}.result_instance ri
left outer join (
select ri.id, 1 as pub_closed_other_open
from ${stats_db_name}.result_instance ri
join ${stats_db_name}.publication p on p.id=ri.id
join ${stats_db_name}.datasource d on ri.hostedby=d.id
where d.type like '%Journal%' and ri.accessright='Closed Access' and
(p.bestlicence='Open Access' or p.bestlicence='Open Source')) tmp
on tmp.id=ri.id; /*EOS*/
(p.bestlicence='Open Access' or p.bestlicence='Open Source')) tmp on tmp.id=ri.id; /*EOS*/
---- Sprint 5 ----
drop table if exists ${stats_db_name}.indi_result_no_of_copies purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_result_no_of_copies stored as parquet as
select id, count(id) as number_of_copies from ${stats_db_name}.result_instance group by id; /*EOS*/
select id, count(id) as number_of_copies
from ${stats_db_name}.result_instance
group by id; /*EOS*/
---- Sprint 6 ----
drop table if exists ${stats_db_name}.indi_pub_downloads purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_pub_downloads stored as parquet as
SELECT result_id, sum(downloads) no_downloads from openaire_prod_usage_stats.usage_stats
SELECT result_id, sum(downloads) no_downloads
from openaire_prod_usage_stats.usage_stats
join ${stats_db_name}.publication on result_id=id
where downloads>0
GROUP BY result_id
order by no_downloads desc; /*EOS*/
--ANALYZE TABLE ${stats_db_name}.indi_pub_downloads COMPUTE STATISTICS;
where downloads>0
GROUP BY result_id; /*EOS*/
drop table if exists ${stats_db_name}.indi_pub_downloads_datasource purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_pub_downloads_datasource stored as parquet as
SELECT result_id, repository_id, sum(downloads) no_downloads from openaire_prod_usage_stats.usage_stats
SELECT result_id, repository_id, sum(downloads) no_downloads
from openaire_prod_usage_stats.usage_stats
join ${stats_db_name}.publication on result_id=id
where downloads>0
GROUP BY result_id, repository_id
order by result_id; /*EOS*/
where downloads>0
GROUP BY result_id, repository_id; /*EOS*/
drop table if exists ${stats_db_name}.indi_pub_downloads_year purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_pub_downloads_year stored as parquet as
SELECT result_id, cast(substring(us.`date`, 1,4) as int) as `year`, sum(downloads) no_downloads
from openaire_prod_usage_stats.usage_stats us
join ${stats_db_name}.publication on result_id=id where downloads>0
GROUP BY result_id, substring(us.`date`, 1,4); /*EOS*/
SELECT result_id, cast(substring(us.`date`, 1,4) as int) as `year`, sum(downloads) no_downloads
from openaire_prod_usage_stats.usage_stats us
join ${stats_db_name}.publication on result_id=id where downloads>0
GROUP BY result_id, substring(us.`date`, 1,4); /*EOS*/
drop table if exists ${stats_db_name}.indi_pub_downloads_datasource_year purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_pub_downloads_datasource_year stored as parquet as
SELECT result_id, cast(substring(us.`date`, 1,4) as int) as `year`, repository_id, sum(downloads) no_downloads from openaire_prod_usage_stats.usage_stats us
join ${stats_db_name}.publication on result_id=id
where downloads>0
GROUP BY result_id, repository_id, substring(us.`date`, 1,4); /*EOS*/
SELECT result_id, cast(substring(us.`date`, 1,4) as int) as `year`, repository_id, sum(downloads) no_downloads
from openaire_prod_usage_stats.usage_stats us
join ${stats_db_name}.publication on result_id=id
where downloads>0
GROUP BY result_id, repository_id, substring(us.`date`, 1,4); /*EOS*/
---- Sprint 7 ----
drop table if exists ${stats_db_name}.indi_pub_gold_oa purge; /*EOS*/
--create table if not exists ${stats_db_name}.indi_pub_gold_oa stored as parquet as
-- WITH gold_oa AS ( SELECT
-- issn_l,
-- journal_is_in_doaj,
-- journal_is_oa,
-- issn_1 as issn
-- FROM
-- STATS_EXT.oa_journals
-- WHERE
-- issn_1 != ""
-- UNION
-- ALL SELECT
-- issn_l,
-- journal_is_in_doaj,
-- journal_is_oa,
-- issn_2 as issn
-- FROM
-- STATS_EXT.oa_journals
-- WHERE
-- issn_2 != "" ), issn AS ( SELECT
-- *
-- FROM
--( SELECT
-- id,
-- issn_printed as issn
-- FROM
-- ${stats_db_name}.datasource
-- WHERE
-- issn_printed IS NOT NULL
-- UNION ALL
-- SELECT
-- id,
-- issn_online as issn
-- FROM
-- ${stats_db_name}.datasource
-- WHERE
-- issn_online IS NOT NULL or id like '%doajarticles%') as issn
-- WHERE
-- LENGTH(issn) > 7)
--SELECT
-- DISTINCT pd.id, coalesce(is_gold, 0) as is_gold
--FROM
-- ${stats_db_name}.publication_datasources pd
-- left outer join(
-- select pd.id, 1 as is_gold FROM ${stats_db_name}.publication_datasources pd
-- JOIN issn on issn.id=pd.datasource
-- JOIN gold_oa on issn.issn = gold_oa.issn) tmp
-- on pd.id=tmp.id;
--create table if not exists ${stats_db_name}.indi_pub_gold_oa stored as parquet as
--with gold_oa as (
--SELECT issn,issn_l from stats_ext.issn_gold_oa_dataset_v5),
--issn AS (SELECT * FROM
--(SELECT id,issn_printed as issn FROM ${stats_db_name}.datasource
--WHERE issn_printed IS NOT NULL
--UNION ALL
--SELECT id, issn_online as issn FROM ${stats_db_name}.datasource
--WHERE issn_online IS NOT NULL or id like '%doajarticles%') as issn
--WHERE LENGTH(issn) > 7),
--alljournals AS(select issn, issn_l from stats_ext.alljournals
--where journal_is_in_doaj=true or journal_is_oa=true)
--SELECT DISTINCT pd.id, coalesce(is_gold, 0) as is_gold
--FROM ${stats_db_name}.publication_datasources pd
--left outer join (
--select pd.id, 1 as is_gold FROM ${stats_db_name}.publication_datasources pd
--JOIN issn on issn.id=pd.datasource
--JOIN gold_oa on issn.issn = gold_oa.issn
--join alljournals on issn.issn=alljournals.issn
--left outer join ${stats_db_name}.result_instance ri on ri.id=pd.id
--and ri.accessright!='Closed Access' and ri.accessright_uw='gold') tmp
--on pd.id=tmp.id;
create table if not exists ${stats_db_name}.indi_pub_gold_oa stored as parquet as
with gold_oa as (
select distinct issn from (
with gold_oa as (
select distinct issn from (
SELECT issn_l as issn from stats_ext.issn_gold_oa_dataset_v5
UNION ALL
SELECT issn as issn from stats_ext.issn_gold_oa_dataset_v5
@ -365,8 +231,8 @@ select distinct issn from (
select issn from stats_ext.alljournals where journal_is_in_doaj=true or journal_is_oa=true
UNION ALL
select issn_l as issn from stats_ext.alljournals where journal_is_in_doaj=true or journal_is_oa=true) foo),
dd as (
select distinct * from (
dd as (
select distinct * from (
select id, issn_printed as issn from ${stats_db_name}.datasource d where d.id like '%doajarticles%'
UNION ALL
select id, issn_online as issn from ${stats_db_name}.datasource d where d.id like '%doajarticles%'
@ -374,17 +240,16 @@ select distinct * from (
select id, issn_printed as issn from ${stats_db_name}.datasource d join gold_oa on gold_oa.issn=d.issn_printed
UNION ALL
select id, issn_online as issn from ${stats_db_name}.datasource d join gold_oa on gold_oa.issn=d.issn_online) foo
)
SELECT DISTINCT pd.id, coalesce(is_gold, 0) as is_gold
FROM ${stats_db_name}.publication_datasources pd
left outer join (
)
SELECT DISTINCT pd.id, coalesce(is_gold, 0) as is_gold
FROM ${stats_db_name}.publication_datasources pd
left outer join (
select pd.id, 1 as is_gold
FROM ${stats_db_name}.publication_datasources pd
join dd on dd.id=pd.datasource
left outer join ${stats_db_name}.result_accessroute ra on ra.id = pd.id where ra.accessroute = 'gold') tmp on tmp.id=pd.id; /*EOS*/
drop table if exists ${stats_db_name}.indi_pub_hybrid_oa_with_cc purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_pub_hybrid_oa_with_cc stored as parquet as
WITH hybrid_oa AS (
SELECT issn_l, journal_is_in_doaj, journal_is_oa, issn_print as issn
@ -405,110 +270,46 @@ create table if not exists ${stats_db_name}.indi_pub_hybrid_oa_with_cc stored as
FROM ${stats_db_name}.datasource
WHERE issn_online IS NOT NULL ) as issn
WHERE LENGTH(issn) > 7)
SELECT DISTINCT pd.id, coalesce(is_hybrid_oa, 0) as is_hybrid_oa
FROM ${stats_db_name}.publication_datasources pd
SELECT DISTINCT pd.id, coalesce(is_hybrid_oa, 0) as is_hybrid_oa
FROM ${stats_db_name}.publication_datasources pd
LEFT OUTER JOIN (
SELECT pd.id, 1 as is_hybrid_oa from ${stats_db_name}.publication_datasources pd
JOIN ${stats_db_name}.datasource d on d.id=pd.datasource
JOIN issn on issn.id=pd.datasource
JOIN hybrid_oa ON issn.issn = hybrid_oa.issn
JOIN ${stats_db_name}.indi_result_has_cc_licence cc on pd.id=cc.id
JOIN ${stats_db_name}.indi_pub_gold_oa ga on pd.id=ga.id
where cc.has_cc_license=1 and ga.is_gold=0) tmp on pd.id=tmp.id; /*EOS*/
JOIN ${stats_db_name}.indi_pub_gold_oa ga on pd.id=ga.id where cc.has_cc_license=1 and ga.is_gold=0) tmp on pd.id=tmp.id; /*EOS*/
drop table if exists ${stats_db_name}.indi_pub_hybrid purge; /*EOS*/
--create table if not exists ${stats_db_name}.indi_pub_hybrid stored as parquet as
-- WITH gold_oa AS ( SELECT
-- issn_l,
-- journal_is_in_doaj,
-- journal_is_oa,
-- issn_1 as issn,
-- has_apc
-- FROM
-- STATS_EXT.oa_journals
-- WHERE
-- issn_1 != ""
-- UNION
-- ALL SELECT
-- issn_l,
-- journal_is_in_doaj,
-- journal_is_oa,
-- issn_2 as issn,
-- has_apc
-- FROM
-- STATS_EXT.oa_journals
-- WHERE
-- issn_2 != "" ), issn AS ( SELECT
-- *
-- FROM
--( SELECT
-- id,
-- issn_printed as issn
-- FROM
-- ${stats_db_name}.datasource
-- WHERE
-- issn_printed IS NOT NULL
-- UNION ALL
-- SELECT
-- id,
-- issn_online as issn
-- FROM
-- ${stats_db_name}.datasource
-- WHERE
-- issn_online IS NOT NULL or id like '%doajarticles%') as issn
-- WHERE
-- LENGTH(issn) > 7)
--select distinct pd.id, coalesce(is_hybrid, 0) as is_hybrid
--from ${stats_db_name}.publication_datasources pd
-- left outer join (
-- select pd.id, 1 as is_hybrid from ${stats_db_name}.publication_datasources pd
-- join ${stats_db_name}.datasource d on d.id=pd.datasource
-- join issn on issn.id=pd.datasource
-- join gold_oa on issn.issn=gold_oa.issn
-- where (gold_oa.journal_is_in_doaj=false or gold_oa.journal_is_oa=false))tmp
-- on pd.id=tmp.id;
--create table if not exists ${stats_db_name}.indi_pub_hybrid stored as parquet as
--select distinct pd.id,coalesce(is_hybrid,0) is_hybrid from ${stats_db_name}.publication_datasources pd
--left outer join (select pd.id, 1 as is_hybrid from ${stats_db_name}.publication_datasources pd
--join ${stats_db_name}.datasource d on pd.datasource=d.id
--join ${stats_db_name}.result_instance ri on ri.id=pd.id
--join ${stats_db_name}.indi_pub_gold_oa indi_gold on indi_gold.id=pd.id
--join ${stats_db_name}.result_accessroute ra on ra.id=pd.id
--where d.type like '%Journal%' and ri.accessright!='Closed Access' and (ri.accessright_uw!='gold'
--or indi_gold.is_gold=0) and (ra.accessroute='hybrid' or ri.license is not null)) tmp
--on pd.id=tmp.id;
create table if not exists ${stats_db_name}.indi_pub_hybrid stored as parquet as
select distinct pd.id,coalesce(is_hybrid,0) is_hybrid from ${stats_db_name}.publication pd
left outer join (select pd.id, 1 as is_hybrid from ${stats_db_name}.publication pd
join ${stats_db_name}.result_instance ri on ri.id=pd.id
join ${stats_db_name}.indi_pub_gold_oa indi_gold on indi_gold.id=pd.id
join ${stats_db_name}.result_accessroute ra on ra.id=pd.id
join ${stats_db_name}.datasource d on d.id=ri.hostedby
where indi_gold.is_gold=0 and ((d.type like '%Journal%' and ri.accessright!='Closed Access' and ri.accessright!='Restricted' and ri.license is not null) or
ra.accessroute='hybrid'))tmp
on pd.id=tmp.id; /*EOS*/
left outer join (
select pd.id, 1 as is_hybrid from ${stats_db_name}.publication pd
join ${stats_db_name}.result_instance ri on ri.id=pd.id
join ${stats_db_name}.indi_pub_gold_oa indi_gold on indi_gold.id=pd.id
join ${stats_db_name}.result_accessroute ra on ra.id=pd.id
join ${stats_db_name}.datasource d on d.id=ri.hostedby
where indi_gold.is_gold=0 and ((d.type like '%Journal%' and ri.accessright!='Closed Access' and ri.accessright!='Restricted' and ri.license is not null) or ra.accessroute='hybrid')) tmp on pd.id=tmp.id; /*EOS*/
drop table if exists ${stats_db_name}.indi_org_fairness purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_org_fairness stored as parquet as
--return results with PIDs, and rich metadata group by organization
with result_fair as
(select ro.organization organization, count(distinct ro.id) no_result_fair from ${stats_db_name}.result_organization ro
with result_fair as (
select ro.organization organization, count(distinct ro.id) no_result_fair
from ${stats_db_name}.result_organization ro
join ${stats_db_name}.result r on r.id=ro.id
--join result_pids rp on r.id=rp.id
where (title is not null) and (publisher is not null) and (abstract=true) and (year is not null) and (authors>0) and cast(year as int)>2003
group by ro.organization),
--return all results group by organization
allresults as (select ro.organization, count(distinct ro.id) no_allresults from ${stats_db_name}.result_organization ro
allresults as (
select ro.organization, count(distinct ro.id) no_allresults from ${stats_db_name}.result_organization ro
join ${stats_db_name}.result r on r.id=ro.id
where cast(year as int)>2003
group by ro.organization)
--return results_fair/all_results
select allresults.organization, result_fair.no_result_fair/allresults.no_allresults org_fairness
from allresults
select allresults.organization, result_fair.no_result_fair/allresults.no_allresults org_fairness
from allresults
join result_fair on result_fair.organization=allresults.organization; /*EOS*/
CREATE TEMPORARY VIEW result_fair as
@ -534,8 +335,8 @@ select ar.organization, rf.no_result_fair/ar.no_allresults org_fairness
from allresults ar
join result_fair rf on rf.organization=ar.organization; /*EOS*/
DROP VIEW result_fair; /*EOS*/
DROP VIEW allresults; /*EOS*/
DROP VIEW result_fair;
DROP VIEW allresults;
CREATE TEMPORARY VIEW result_fair as
select year, ro.organization organization, count(distinct ro.id) no_result_fair from ${stats_db_name}.result_organization ro
@ -859,19 +660,6 @@ drop view pub_fos_totals; /*EOS*/
drop table if exists ${stats_db_name}.indi_pub_bronze_oa purge; /*EOS*/
--create table if not exists ${stats_db_name}.indi_pub_bronze_oa stored as parquet as
--select distinct p.id, coalesce(is_bronze_oa,0) as is_bronze_oa
--from ${stats_db_name}.publication p
--left outer join
--(select p.id, 1 as is_bronze_oa from ${stats_db_name}.publication p
--join ${stats_db_name}.indi_result_has_cc_licence cc on cc.id=p.id
--join ${stats_db_name}.indi_pub_gold_oa ga on ga.id=p.id
--join ${stats_db_name}.result_instance ri on ri.id=p.id
--join ${stats_db_name}.datasource d on d.id=ri.hostedby
--where cc.has_cc_license=0 and ga.is_gold=0
--and (d.type='Journal' or d.type='Journal Aggregator/Publisher')
--and ri.accessright='Open Access') tmp on tmp.id=p.id;
create table ${stats_db_name}.indi_pub_bronze_oa stored as parquet as
select distinct pd.id,coalesce(is_bronze_oa,0) is_bronze_oa from ${stats_db_name}.publication pd
left outer join (select pd.id, 1 as is_bronze_oa from ${stats_db_name}.publication pd
@ -1189,7 +977,7 @@ union all
select pf.name from stats_ext.insitutions_for_publicly_funded pf
join ${stats_db_name}.project p on p.funder=pf.name
union all
select pf.name from stats_ext.insitutions_for_publicly_funded pf
select op.name from stats_ext.insitutions_for_publicly_funded pf
join org_names_pids op on (op.name=pf.name or op.pid=pf.ror)
and pf.publicly_funded='yes') foo)
select distinct p.id, coalesce(publicly_funded, 0) as publicly_funded
@ -1199,3 +987,53 @@ select distinct ro.id, 1 as publicly_funded from ${stats_db_name}.result_organiz
join ${stats_db_name}.organization o on o.id=ro.organization
join publicly_funded_orgs pfo on o.name=pfo.name) tmp on p.id=tmp.id; /*EOS*/
drop table if exists ${stats_db_name}.indi_pub_green_with_license purge; /*EOS*/
create table ${stats_db_name}.indi_pub_green_with_license stored as parquet as
select distinct p.id, coalesce(green_with_license, 0) as green_with_license
from ${stats_db_name}.publication p
left outer join (
select distinct p.id, 1 as green_with_license from ${stats_db_name}.publication p
join ${stats_db_name}.result_instance ri on ri.id = p.id
join ${stats_db_name}.datasource on datasource.id = ri.hostedby
where ri.license is not null and datasource.type like '%Repository%' and datasource.name!='Other') tmp on p.id= tmp.id; /*EOS*/
drop table if exists ${stats_db_name}.result_country purge; /*EOS*/
create table ${stats_db_name}.result_country stored as parquet as
select distinct ro.id, coalesce(o.country, f.country)
from ${stats_db_name}.result_organization ro
left outer join ${stats_db_name}.organization o on o.id=ro.organization
left outer join ${stats_db_name}.result_projects rp on rp.id=ro.id
left outer join ${stats_db_name}.project p on p.id=rp.project
left outer join ${stats_db_name}.funder f on f.name=p.funder
where coalesce(o.country, f.country) IS NOT NULL;
drop table if exists ${stats_db_name}.indi_result_oa_with_license purge; /*EOS*/
create table ${stats_db_name}.indi_result_oa_with_license stored as parquet as
select distinct r.id, coalesce(oa_with_license,0) as oa_with_license
from ${stats_db_name}.result r
left outer join (select distinct r.id, 1 as oa_with_license from ${stats_db_name}.result r
join ${stats_db_name}.result_licenses rl on rl.id=r.id where r.bestlicence='Open Access') tmp on r.id=tmp.id; /*EOS*/
drop table if exists ${stats_db_name}.indi_result_oa_without_license purge; /*EOS*/
create table ${stats_db_name}.indi_result_oa_without_license stored as parquet as
with without_license as
(select distinct id from ${stats_db_name}.indi_result_oa_with_license
where oa_with_license=0)
select distinct r.id, coalesce(oa_without_license,0) as oa_without_license
from ${stats_db_name}.result r
left outer join (select distinct r.id, 1 as oa_without_license
from ${stats_db_name}.result r
join without_license wl on wl.id=r.id
where r.bestlicence='Open Access') tmp on r.id=tmp.id; /*EOS*/
drop table if exists ${stats_db_name}.indi_result_under_transformative purge; /*EOS*/
create table ${stats_db_name}.indi_result_under_transformative stored as parquet as
with transformative_dois as (
select distinct doi from stats_ext.transformative_facts)
select distinct r.id, coalesce(under_transformative,0) as under_transformative
from ${stats_db_name}.result r
left outer join (
select distinct rp.id, 1 as under_transformative
from ${stats_db_name}.result_pids rp join ${stats_db_name}.result r on r.id=rp.id
join transformative_dois td on td.doi=rp.pid) tmp on r.id=tmp.id; /*EOS*/