code cleanup

This commit is contained in:
Antonis Lempesis 2024-03-22 10:16:49 +02:00
parent 459167ac2f
commit 4c40c96e30
1 changed files with 181 additions and 237 deletions

View File

@ -1,18 +1,5 @@
-- Sprint 1 ---- -- Sprint 1 ----
drop table if exists ${stats_db_name}.indi_pub_green_oa purge; /*EOS*/ drop table if exists ${stats_db_name}.indi_pub_green_oa purge; /*EOS*/
--create table if not exists ${stats_db_name}.indi_pub_green_oa stored as parquet as
--select distinct p.id, coalesce(green_oa, 0) as green_oa
--from ${stats_db_name}.publication p
-- left outer join (
-- select p.id, 1 as green_oa
-- from ${stats_db_name}.publication p
-- join ${stats_db_name}.result_instance ri on ri.id = p.id
-- join ${stats_db_name}.datasource on datasource.id = ri.hostedby
-- where datasource.type like '%Repository%'
-- and (ri.accessright = 'Open Access'
-- or ri.accessright = 'Embargo' or ri.accessright = 'Open Source')) tmp
-- on p.id= tmp.id;
create table if not exists ${stats_db_name}.indi_pub_green_oa stored as parquet as create table if not exists ${stats_db_name}.indi_pub_green_oa stored as parquet as
select distinct p.id, coalesce(green_oa, 0) as green_oa select distinct p.id, coalesce(green_oa, 0) as green_oa
from ${stats_db_name}.publication p from ${stats_db_name}.publication p
@ -21,13 +8,9 @@ from ${stats_db_name}.publication p
from ${stats_db_name}.publication p from ${stats_db_name}.publication p
join ${stats_db_name}.result_instance ri on ri.id = p.id join ${stats_db_name}.result_instance ri on ri.id = p.id
join ${stats_db_name}.datasource on datasource.id = ri.hostedby join ${stats_db_name}.datasource on datasource.id = ri.hostedby
where datasource.type like '%Repository%' where datasource.type like '%Repository%' and (ri.accessright = 'Open Access' or ri.accessright = 'Embargo' or ri.accessright = 'Open Source') and datasource.name!='Other') tmp on p.id= tmp.id; /*EOS*/
and (ri.accessright = 'Open Access'
or ri.accessright = 'Embargo' or ri.accessright = 'Open Source') and datasource.name!='Other') tmp
on p.id= tmp.id; /*EOS*/
drop table if exists ${stats_db_name}.indi_pub_grey_lit purge; /*EOS*/ drop table if exists ${stats_db_name}.indi_pub_grey_lit purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_pub_grey_lit stored as parquet as create table if not exists ${stats_db_name}.indi_pub_grey_lit stored as parquet as
select distinct p.id, coalesce(grey_lit, 0) as grey_lit select distinct p.id, coalesce(grey_lit, 0) as grey_lit
from ${stats_db_name}.publication p from ${stats_db_name}.publication p
@ -35,136 +18,107 @@ from ${stats_db_name}.publication p
select p.id, 1 as grey_lit select p.id, 1 as grey_lit
from ${stats_db_name}.publication p from ${stats_db_name}.publication p
join ${stats_db_name}.result_classifications rt on rt.id = p.id join ${stats_db_name}.result_classifications rt on rt.id = p.id
where rt.type not in ('Article','Part of book or chapter of book','Book','Doctoral thesis','Master thesis','Data Paper', 'Thesis', 'Bachelor thesis', 'Conference object') and where rt.type not in ('Article','Part of book or chapter of book','Book','Doctoral thesis','Master thesis','Data Paper', 'Thesis', 'Bachelor thesis', 'Conference object')
not exists (select 1 from ${stats_db_name}.result_classifications rc where type ='Other literature type' and not exists (select 1 from ${stats_db_name}.result_classifications rc where type ='Other literature type' and rc.id=p.id)) tmp on p.id=tmp.id; /*EOS*/
and rc.id=p.id)) tmp on p.id=tmp.id; /*EOS*/
drop table if exists ${stats_db_name}.indi_pub_doi_from_crossref purge; /*EOS*/ drop table if exists ${stats_db_name}.indi_pub_doi_from_crossref purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_pub_doi_from_crossref stored as parquet as create table if not exists ${stats_db_name}.indi_pub_doi_from_crossref stored as parquet as
select distinct p.id, coalesce(doi_from_crossref, 0) as doi_from_crossref select distinct p.id, coalesce(doi_from_crossref, 0) as doi_from_crossref
from ${stats_db_name}.publication p from ${stats_db_name}.publication p
left outer join left outer join (
(select ri.id, 1 as doi_from_crossref from ${stats_db_name}.result_instance ri select ri.id, 1 as doi_from_crossref from ${stats_db_name}.result_instance ri
join ${stats_db_name}.datasource d on d.id = ri.collectedfrom join ${stats_db_name}.datasource d on d.id = ri.collectedfrom
where pidtype='Digital Object Identifier' and d.name ='Crossref') tmp where pidtype='Digital Object Identifier' and d.name ='Crossref') tmp on tmp.id=p.id; /*EOS*/
on tmp.id=p.id; /*EOS*/
-- Sprint 2 ---- -- Sprint 2 ----
drop table if exists ${stats_db_name}.indi_result_has_cc_licence purge; /*EOS*/ drop table if exists ${stats_db_name}.indi_result_has_cc_licence purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_result_has_cc_licence stored as parquet as create table if not exists ${stats_db_name}.indi_result_has_cc_licence stored as parquet as
select distinct r.id, (case when lic='' or lic is null then 0 else 1 end) as has_cc_license select distinct r.id, (case when lic='' or lic is null then 0 else 1 end) as has_cc_license
from ${stats_db_name}.result r from ${stats_db_name}.result r
left outer join (select r.id, license.type as lic from ${stats_db_name}.result r left outer join (
select r.id, license.type as lic from ${stats_db_name}.result r
join ${stats_db_name}.result_licenses as license on license.id = r.id join ${stats_db_name}.result_licenses as license on license.id = r.id
where lower(license.type) LIKE '%creativecommons.org%' OR lower(license.type) LIKE '%cc %') tmp where lower(license.type) LIKE '%creativecommons.org%' OR lower(license.type) LIKE '%cc %') tmp on r.id= tmp.id; /*EOS*/
on r.id= tmp.id; /*EOS*/
drop table if exists ${stats_db_name}.indi_result_has_cc_licence_url purge; /*EOS*/ drop table if exists ${stats_db_name}.indi_result_has_cc_licence_url purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_result_has_cc_licence_url stored as parquet as create table if not exists ${stats_db_name}.indi_result_has_cc_licence_url stored as parquet as
select distinct r.id, case when lic_host='' or lic_host is null then 0 else 1 end as has_cc_license_url select distinct r.id, case when lic_host='' or lic_host is null then 0 else 1 end as has_cc_license_url
from ${stats_db_name}.result r from ${stats_db_name}.result r
left outer join (select r.id, lower(parse_url(license.type, "HOST")) as lic_host left outer join (
select r.id, lower(parse_url(license.type, "HOST")) as lic_host
from ${stats_db_name}.result r from ${stats_db_name}.result r
join ${stats_db_name}.result_licenses as license on license.id = r.id join ${stats_db_name}.result_licenses as license on license.id = r.id
WHERE lower(parse_url(license.type, "HOST")) = "creativecommons.org") tmp WHERE lower(parse_url(license.type, "HOST")) = "creativecommons.org") tmp on r.id= tmp.id; /*EOS*/
on r.id= tmp.id; /*EOS*/
drop table if exists ${stats_db_name}.indi_pub_has_abstract purge; /*EOS*/ drop table if exists ${stats_db_name}.indi_pub_has_abstract purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_pub_has_abstract stored as parquet as create table if not exists ${stats_db_name}.indi_pub_has_abstract stored as parquet as
select distinct publication.id, cast(coalesce(abstract, true) as int) has_abstract select distinct publication.id, cast(coalesce(abstract, true) as int) has_abstract
from ${stats_db_name}.publication; /*EOS*/ from ${stats_db_name}.publication; /*EOS*/
drop table if exists ${stats_db_name}.indi_result_with_orcid purge; /*EOS*/ drop table if exists ${stats_db_name}.indi_result_with_orcid purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_result_with_orcid stored as parquet as create table if not exists ${stats_db_name}.indi_result_with_orcid stored as parquet as
select distinct r.id, coalesce(has_orcid, 0) as has_orcid select distinct r.id, coalesce(has_orcid, 0) as has_orcid
from ${stats_db_name}.result r from ${stats_db_name}.result r
left outer join (select id, 1 as has_orcid from ${stats_db_name}.result_orcid) tmp left outer join (
on r.id= tmp.id; /*EOS*/ select id, 1 as has_orcid from ${stats_db_name}.result_orcid) tmp on r.id= tmp.id; /*EOS*/
---- Sprint 3 ---- ---- Sprint 3 ----
drop table if exists ${stats_db_name}.indi_funded_result_with_fundref purge; /*EOS*/ drop table if exists ${stats_db_name}.indi_funded_result_with_fundref purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_funded_result_with_fundref stored as parquet as create table if not exists ${stats_db_name}.indi_funded_result_with_fundref stored as parquet as
select distinct r.result as id, coalesce(fundref, 0) as fundref select distinct r.result as id, coalesce(fundref, 0) as fundref
from ${stats_db_name}.project_results r from ${stats_db_name}.project_results r
left outer join (select distinct result, 1 as fundref from ${stats_db_name}.project_results left outer join (
where provenance='Harvested') tmp select distinct result, 1 as fundref from ${stats_db_name}.project_results where provenance='Harvested') tmp on r.result= tmp.result; /*EOS*/
on r.result= tmp.result; /*EOS*/
-- create table indi_result_org_collab stored as parquet as
-- select o1.organization org1, o2.organization org2, count(distinct o1.id) as collaborations
-- from result_organization as o1
-- join result_organization as o2 on o1.id=o2.id and o1.organization!=o2.organization
-- group by o1.organization, o2.organization;
--
-- compute stats indi_result_org_collab;
--
create TEMPORARY VIEW tmp AS SELECT ro.organization organization, ro.id, o.name from ${stats_db_name}.result_organization ro
join ${stats_db_name}.organization o on o.id=ro.organization where o.name is not null; /*EOS*/
drop table if exists ${stats_db_name}.indi_result_org_collab purge; /*EOS*/ drop table if exists ${stats_db_name}.indi_result_org_collab purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_result_org_collab stored as parquet as create table if not exists ${stats_db_name}.indi_result_org_collab stored as parquet as
WITH tmp AS (
SELECT ro.organization organization, ro.id, o.name
from ${stats_db_name}.result_organization ro
join ${stats_db_name}.organization o on o.id=ro.organization where o.name is not null)
select o1.organization org1, o1.name org1name1, o2.organization org2, o2.name org2name2, count(o1.id) as collaborations select o1.organization org1, o1.name org1name1, o2.organization org2, o2.name org2name2, count(o1.id) as collaborations
from tmp as o1 from tmp as o1
join tmp as o2 where o1.id=o2.id and o1.organization!=o2.organization and o1.name!=o2.name join tmp as o2 where o1.id=o2.id and o1.organization!=o2.organization and o1.name!=o2.name
group by o1.organization, o2.organization, o1.name, o2.name; /*EOS*/ group by o1.organization, o2.organization, o1.name, o2.name; /*EOS*/
DROP VIEW if exists tmp; /*EOS*/
create TEMPORARY VIEW tmp AS
select distinct ro.organization organization, ro.id, o.name, o.country from ${stats_db_name}.result_organization ro
join ${stats_db_name}.organization o on o.id=ro.organization where country <> 'UNKNOWN' and o.name is not null; /*EOS*/
drop table if exists ${stats_db_name}.indi_result_org_country_collab purge; /*EOS*/ drop table if exists ${stats_db_name}.indi_result_org_country_collab purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_result_org_country_collab stored as parquet as create table if not exists ${stats_db_name}.indi_result_org_country_collab stored as parquet as
WITH tmp AS (
select distinct ro.organization organization, ro.id, o.name, o.country
from ${stats_db_name}.result_organization ro
join ${stats_db_name}.organization o on o.id=ro.organization
where country <> 'UNKNOWN' and o.name is not null)
select o1.organization org1,o1.name org1name1, o2.country country2, count(o1.id) as collaborations select o1.organization org1,o1.name org1name1, o2.country country2, count(o1.id) as collaborations
from tmp as o1 join tmp as o2 on o1.id=o2.id from tmp as o1 join tmp as o2 on o1.id=o2.id
where o1.id=o2.id and o1.country!=o2.country where o1.id=o2.id and o1.country!=o2.country
group by o1.organization, o1.id, o1.name, o2.country; /*EOS*/ group by o1.organization, o1.id, o1.name, o2.country; /*EOS*/
DROP VIEW if exists tmp; /*EOS*/
create TEMPORARY VIEW tmp AS
select o.id organization, o.name, ro.project as project from ${stats_db_name}.organization o
join ${stats_db_name}.organization_projects ro on o.id=ro.id where o.name is not null; /*EOS*/
drop table if exists ${stats_db_name}.indi_project_collab_org purge; /*EOS*/ drop table if exists ${stats_db_name}.indi_project_collab_org purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_project_collab_org stored as parquet as create table if not exists ${stats_db_name}.indi_project_collab_org stored as parquet as
WITH tmp AS (
select o.id organization, o.name, ro.project as project
from ${stats_db_name}.organization o
join ${stats_db_name}.organization_projects ro on o.id=ro.id where o.name is not null)
select o1.organization org1,o1.name orgname1, o2.organization org2, o2.name orgname2, count(distinct o1.project) as collaborations select o1.organization org1,o1.name orgname1, o2.organization org2, o2.name orgname2, count(distinct o1.project) as collaborations
from tmp as o1 from tmp as o1
join tmp as o2 on o1.project=o2.project join tmp as o2 on o1.project=o2.project
where o1.organization<>o2.organization and o1.name<>o2.name where o1.organization<>o2.organization and o1.name<>o2.name
group by o1.name,o2.name, o1.organization, o2.organization; /*EOS*/ group by o1.name,o2.name, o1.organization, o2.organization; /*EOS*/
DROP VIEW if exists tmp; /*EOS*/
create TEMPORARY VIEW tmp AS
select o.id organization, o.name, o.country , ro.project as project from ${stats_db_name}.organization o
join ${stats_db_name}.organization_projects ro on o.id=ro.id
and o.country <> 'UNKNOWN' and o.name is not null; /*EOS*/
drop table if exists ${stats_db_name}.indi_project_collab_org_country purge; /*EOS*/ drop table if exists ${stats_db_name}.indi_project_collab_org_country purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_project_collab_org_country stored as parquet as create table if not exists ${stats_db_name}.indi_project_collab_org_country stored as parquet as
WITH tmp AS (
select o.id organization, o.name, o.country , ro.project as project
from ${stats_db_name}.organization o
join ${stats_db_name}.organization_projects ro on o.id=ro.id and o.country <> 'UNKNOWN' and o.name is not null)
select o1.organization org1,o1.name org1name, o2.country country2, count(distinct o1.project) as collaborations select o1.organization org1,o1.name org1name, o2.country country2, count(distinct o1.project) as collaborations
from tmp as o1 from tmp as o1
join tmp as o2 on o1.project=o2.project join tmp as o2 on o1.project=o2.project
where o1.organization<>o2.organization and o1.country<>o2.country where o1.organization<>o2.organization and o1.country<>o2.country
group by o1.organization, o2.country, o1.name; /*EOS*/ group by o1.organization, o2.country, o1.name; /*EOS*/
DROP VIEW if exists tmp; /*EOS*/
drop table if exists ${stats_db_name}.indi_funder_country_collab purge; /*EOS*/ drop table if exists ${stats_db_name}.indi_funder_country_collab purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_funder_country_collab stored as parquet as create table if not exists ${stats_db_name}.indi_funder_country_collab stored as parquet as
with tmp as (select funder, project, country from ${stats_db_name}.organization_projects op with tmp as (select funder, project, country from ${stats_db_name}.organization_projects op
join ${stats_db_name}.organization o on o.id=op.id join ${stats_db_name}.organization o on o.id=op.id
@ -176,25 +130,21 @@ from tmp as f1
where f1.country<>f2.country where f1.country<>f2.country
group by f1.funder, f2.country, f1.country; /*EOS*/ group by f1.funder, f2.country, f1.country; /*EOS*/
create TEMPORARY VIEW tmp AS drop table if exists ${stats_db_name}.indi_result_country_collab purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_result_country_collab stored as parquet as
WITH tmp AS (
select distinct country, ro.id as result from ${stats_db_name}.organization o select distinct country, ro.id as result from ${stats_db_name}.organization o
join ${stats_db_name}.result_organization ro on o.id=ro.organization join ${stats_db_name}.result_organization ro on o.id=ro.organization
where country <> 'UNKNOWN' and o.name is not null; /*EOS*/ where country <> 'UNKNOWN' and o.name is not null)
drop table if exists ${stats_db_name}.indi_result_country_collab purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_result_country_collab stored as parquet as
select o1.country country1, o2.country country2, count(o1.result) as collaborations select o1.country country1, o2.country country2, count(o1.result) as collaborations
from tmp as o1 from tmp as o1
join tmp as o2 on o1.result=o2.result join tmp as o2 on o1.result=o2.result
where o1.country<>o2.country where o1.country<>o2.country
group by o1.country, o2.country; /*EOS*/ group by o1.country, o2.country; /*EOS*/
DROP VIEW if exists tmp; /*EOS*/
---- Sprint 4 ---- ---- Sprint 4 ----
drop table if exists ${stats_db_name}.indi_pub_diamond purge; /*EOS*/ drop table if exists ${stats_db_name}.indi_pub_diamond purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_pub_diamond stored as parquet as create table if not exists ${stats_db_name}.indi_pub_diamond stored as parquet as
select distinct pd.id, coalesce(in_diamond_journal, 0) as in_diamond_journal select distinct pd.id, coalesce(in_diamond_journal, 0) as in_diamond_journal
from ${stats_db_name}.publication_datasources pd from ${stats_db_name}.publication_datasources pd
@ -206,7 +156,6 @@ left outer join (
and (ps.journal_is_in_doaj=true or ps.journal_is_oa=true) and ps.has_apc=false) tmp on pd.id=tmp.id; /*EOS*/ and (ps.journal_is_in_doaj=true or ps.journal_is_oa=true) and ps.has_apc=false) tmp on pd.id=tmp.id; /*EOS*/
drop table if exists ${stats_db_name}.indi_pub_in_transformative purge; /*EOS*/ drop table if exists ${stats_db_name}.indi_pub_in_transformative purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_pub_in_transformative stored as parquet as create table if not exists ${stats_db_name}.indi_pub_in_transformative stored as parquet as
select distinct pd.id, coalesce(is_transformative, 0) as is_transformative select distinct pd.id, coalesce(is_transformative, 0) as is_transformative
from ${stats_db_name}.publication pd from ${stats_db_name}.publication pd
@ -218,7 +167,6 @@ left outer join (
and ps.is_transformative_journal=true) tmp on pd.id=tmp.id; /*EOS*/ and ps.is_transformative_journal=true) tmp on pd.id=tmp.id; /*EOS*/
drop table if exists ${stats_db_name}.indi_pub_closed_other_open purge; /*EOS*/ drop table if exists ${stats_db_name}.indi_pub_closed_other_open purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_pub_closed_other_open stored as parquet as create table if not exists ${stats_db_name}.indi_pub_closed_other_open stored as parquet as
select distinct ri.id, coalesce(pub_closed_other_open, 0) as pub_closed_other_open select distinct ri.id, coalesce(pub_closed_other_open, 0) as pub_closed_other_open
from ${stats_db_name}.result_instance ri from ${stats_db_name}.result_instance ri
@ -230,35 +178,32 @@ left outer join (
where d.type like '%Journal%' and ri.accessright='Closed Access' and where d.type like '%Journal%' and ri.accessright='Closed Access' and
(p.bestlicence='Open Access' or p.bestlicence='Open Source')) tmp on tmp.id=ri.id; /*EOS*/ (p.bestlicence='Open Access' or p.bestlicence='Open Source')) tmp on tmp.id=ri.id; /*EOS*/
---- Sprint 5 ---- ---- Sprint 5 ----
drop table if exists ${stats_db_name}.indi_result_no_of_copies purge; /*EOS*/ drop table if exists ${stats_db_name}.indi_result_no_of_copies purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_result_no_of_copies stored as parquet as create table if not exists ${stats_db_name}.indi_result_no_of_copies stored as parquet as
select id, count(id) as number_of_copies from ${stats_db_name}.result_instance group by id; /*EOS*/ select id, count(id) as number_of_copies
from ${stats_db_name}.result_instance
group by id; /*EOS*/
---- Sprint 6 ---- ---- Sprint 6 ----
drop table if exists ${stats_db_name}.indi_pub_downloads purge; /*EOS*/ drop table if exists ${stats_db_name}.indi_pub_downloads purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_pub_downloads stored as parquet as create table if not exists ${stats_db_name}.indi_pub_downloads stored as parquet as
SELECT result_id, sum(downloads) no_downloads from openaire_prod_usage_stats.usage_stats SELECT result_id, sum(downloads) no_downloads
from openaire_prod_usage_stats.usage_stats
join ${stats_db_name}.publication on result_id=id join ${stats_db_name}.publication on result_id=id
where downloads>0 where downloads>0
GROUP BY result_id GROUP BY result_id; /*EOS*/
order by no_downloads desc; /*EOS*/
--ANALYZE TABLE ${stats_db_name}.indi_pub_downloads COMPUTE STATISTICS;
drop table if exists ${stats_db_name}.indi_pub_downloads_datasource purge; /*EOS*/ drop table if exists ${stats_db_name}.indi_pub_downloads_datasource purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_pub_downloads_datasource stored as parquet as create table if not exists ${stats_db_name}.indi_pub_downloads_datasource stored as parquet as
SELECT result_id, repository_id, sum(downloads) no_downloads from openaire_prod_usage_stats.usage_stats SELECT result_id, repository_id, sum(downloads) no_downloads
from openaire_prod_usage_stats.usage_stats
join ${stats_db_name}.publication on result_id=id join ${stats_db_name}.publication on result_id=id
where downloads>0 where downloads>0
GROUP BY result_id, repository_id GROUP BY result_id, repository_id; /*EOS*/
order by result_id; /*EOS*/
drop table if exists ${stats_db_name}.indi_pub_downloads_year purge; /*EOS*/ drop table if exists ${stats_db_name}.indi_pub_downloads_year purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_pub_downloads_year stored as parquet as create table if not exists ${stats_db_name}.indi_pub_downloads_year stored as parquet as
SELECT result_id, cast(substring(us.`date`, 1,4) as int) as `year`, sum(downloads) no_downloads SELECT result_id, cast(substring(us.`date`, 1,4) as int) as `year`, sum(downloads) no_downloads
from openaire_prod_usage_stats.usage_stats us from openaire_prod_usage_stats.usage_stats us
@ -266,16 +211,16 @@ join ${stats_db_name}.publication on result_id=id where downloads>0
GROUP BY result_id, substring(us.`date`, 1,4); /*EOS*/ GROUP BY result_id, substring(us.`date`, 1,4); /*EOS*/
drop table if exists ${stats_db_name}.indi_pub_downloads_datasource_year purge; /*EOS*/ drop table if exists ${stats_db_name}.indi_pub_downloads_datasource_year purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_pub_downloads_datasource_year stored as parquet as create table if not exists ${stats_db_name}.indi_pub_downloads_datasource_year stored as parquet as
SELECT result_id, cast(substring(us.`date`, 1,4) as int) as `year`, repository_id, sum(downloads) no_downloads from openaire_prod_usage_stats.usage_stats us SELECT result_id, cast(substring(us.`date`, 1,4) as int) as `year`, repository_id, sum(downloads) no_downloads
from openaire_prod_usage_stats.usage_stats us
join ${stats_db_name}.publication on result_id=id join ${stats_db_name}.publication on result_id=id
where downloads>0 where downloads>0
GROUP BY result_id, repository_id, substring(us.`date`, 1,4); /*EOS*/ GROUP BY result_id, repository_id, substring(us.`date`, 1,4); /*EOS*/
---- Sprint 7 ---- ---- Sprint 7 ----
drop table if exists ${stats_db_name}.indi_pub_gold_oa purge; /*EOS*/ drop table if exists ${stats_db_name}.indi_pub_gold_oa purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_pub_gold_oa stored as parquet as create table if not exists ${stats_db_name}.indi_pub_gold_oa stored as parquet as
with gold_oa as ( with gold_oa as (
select distinct issn from ( select distinct issn from (
@ -305,7 +250,6 @@ left outer join (
left outer join ${stats_db_name}.result_accessroute ra on ra.id = pd.id where ra.accessroute = 'gold') tmp on tmp.id=pd.id; /*EOS*/ left outer join ${stats_db_name}.result_accessroute ra on ra.id = pd.id where ra.accessroute = 'gold') tmp on tmp.id=pd.id; /*EOS*/
drop table if exists ${stats_db_name}.indi_pub_hybrid_oa_with_cc purge; /*EOS*/ drop table if exists ${stats_db_name}.indi_pub_hybrid_oa_with_cc purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_pub_hybrid_oa_with_cc stored as parquet as create table if not exists ${stats_db_name}.indi_pub_hybrid_oa_with_cc stored as parquet as
WITH hybrid_oa AS ( WITH hybrid_oa AS (
SELECT issn_l, journal_is_in_doaj, journal_is_oa, issn_print as issn SELECT issn_l, journal_is_in_doaj, journal_is_oa, issn_print as issn
@ -337,7 +281,6 @@ LEFT OUTER JOIN (
JOIN ${stats_db_name}.indi_pub_gold_oa ga on pd.id=ga.id where cc.has_cc_license=1 and ga.is_gold=0) tmp on pd.id=tmp.id; /*EOS*/ JOIN ${stats_db_name}.indi_pub_gold_oa ga on pd.id=ga.id where cc.has_cc_license=1 and ga.is_gold=0) tmp on pd.id=tmp.id; /*EOS*/
drop table if exists ${stats_db_name}.indi_pub_hybrid purge; /*EOS*/ drop table if exists ${stats_db_name}.indi_pub_hybrid purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_pub_hybrid stored as parquet as create table if not exists ${stats_db_name}.indi_pub_hybrid stored as parquet as
select distinct pd.id,coalesce(is_hybrid,0) is_hybrid from ${stats_db_name}.publication pd select distinct pd.id,coalesce(is_hybrid,0) is_hybrid from ${stats_db_name}.publication pd
left outer join ( left outer join (
@ -349,17 +292,18 @@ left outer join (
where indi_gold.is_gold=0 and ((d.type like '%Journal%' and ri.accessright!='Closed Access' and ri.accessright!='Restricted' and ri.license is not null) or ra.accessroute='hybrid')) tmp on pd.id=tmp.id; /*EOS*/ where indi_gold.is_gold=0 and ((d.type like '%Journal%' and ri.accessright!='Closed Access' and ri.accessright!='Restricted' and ri.license is not null) or ra.accessroute='hybrid')) tmp on pd.id=tmp.id; /*EOS*/
drop table if exists ${stats_db_name}.indi_org_fairness purge; /*EOS*/ drop table if exists ${stats_db_name}.indi_org_fairness purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_org_fairness stored as parquet as create table if not exists ${stats_db_name}.indi_org_fairness stored as parquet as
--return results with PIDs, and rich metadata group by organization --return results with PIDs, and rich metadata group by organization
with result_fair as with result_fair as (
(select ro.organization organization, count(distinct ro.id) no_result_fair from ${stats_db_name}.result_organization ro select ro.organization organization, count(distinct ro.id) no_result_fair
from ${stats_db_name}.result_organization ro
join ${stats_db_name}.result r on r.id=ro.id join ${stats_db_name}.result r on r.id=ro.id
--join result_pids rp on r.id=rp.id --join result_pids rp on r.id=rp.id
where (title is not null) and (publisher is not null) and (abstract=true) and (year is not null) and (authors>0) and cast(year as int)>2003 where (title is not null) and (publisher is not null) and (abstract=true) and (year is not null) and (authors>0) and cast(year as int)>2003
group by ro.organization), group by ro.organization),
--return all results group by organization --return all results group by organization
allresults as (select ro.organization, count(distinct ro.id) no_allresults from ${stats_db_name}.result_organization ro allresults as (
select ro.organization, count(distinct ro.id) no_allresults from ${stats_db_name}.result_organization ro
join ${stats_db_name}.result r on r.id=ro.id join ${stats_db_name}.result r on r.id=ro.id
where cast(year as int)>2003 where cast(year as int)>2003
group by ro.organization) group by ro.organization)