1
0
Fork 0

Sprint 5 and other changes

This commit is contained in:
dimitrispie 2021-12-20 19:23:57 +02:00
parent 09fc2afdca
commit c1cdec09a9
1 changed files with 83 additions and 61 deletions

View File

@ -1,28 +1,29 @@
---- Sprint 1 ----
create table indi_pub_green_oa stored as parquet as
select distinct p.id, coalesce(green_oa, 0) as green_oa
select distinct p.id, coalesce(green_oa, 0) as green_oa
from publication p
left outer join (
select p.id, 1 as green_oa
left outer join (
select p.id, 1 as green_oa
from publication p
join result_instance ri on ri.id = p.id
join datasource on datasource.id = ri.hostedby
where datasource.type like '%Repository%'
and (ri.accessright = 'Open Access'
or ri.accessright = 'Embargo' or ri.accessright = 'Open Source')) tmp
and (ri.accessright = 'Open Access'
or ri.accessright = 'Embargo' or ri.accessright = 'Open Source')) tmp
on p.id= tmp.id;
create table indi_pub_grey_lit stored as parquet as
select distinct p.id, coalesce(grey_lit, 0) as grey_lit
from publication p
left outer join (
select p.id, 1 as grey_lit
select p.id, 1 as grey_lit
from publication p
join result_classifications rt on rt.id = p.id
where rt.type not in ('Article','Part of book or chapter of book','Book','Doctoral thesis','Master thesis','Data Paper', 'Thesis', 'Bachelor thesis', 'Conference object') and
where rt.type not in ('Article','Part of book or chapter of book','Book','Doctoral thesis','Master thesis','Data Paper', 'Thesis', 'Bachelor thesis', 'Conference object') and
not exists (select 1 from result_classifications rc where type ='Other literature type' and rc.id=p.id)) tmp on p.id=tmp.id;
create table indi_pub_doi_from_crossref stored as parquet as
select distinct p.id, coalesce(doi_from_crossref, 0) as doi_from_crossref
select distinct p.id, coalesce(doi_from_crossref, 0) as doi_from_crossref
from publication p
left outer join
(select ri.id, 1 as doi_from_crossref from result_instance ri
@ -33,7 +34,7 @@ on tmp.id=p.id;
create table indi_pub_gold_oa stored as parquet as
select distinct p.id, coalesce(gold_oa, 0) as gold_oa
from publication p
left outer join (
left outer join (
select p.id, 1 as gold_oa
from publication p
join result_instance ri on ri.id = p.id
@ -214,82 +215,85 @@ on p.id= tmp.id;
--select year, type, round(no_of_pubs/total*100,3) averageOfPubs
--from total;
create table indi_pub_has_cc_licence stored as parquet as
select distinct p.id, (case when lic='' or lic is null then 0 else 1 end) as has_cc_license
from publication p
left outer join (select p.id, license.type as lic from publication p
join publication_licenses as license on license.id = p.id
---- Sprint 2 ----
create table indi_result_has_cc_licence_f stored as parquet as
select distinct r.id, (case when lic='' or lic is null then 0 else 1 end) as has_cc_license
from result r
left outer join (select r.id, license.type as lic from result r
join result_licenses as license on license.id = r.id
where lower(license.type) LIKE '%creativecommons.org%' OR lower(license.type) LIKE '%cc-%') tmp
on p.id= tmp.id;
on r.id= tmp.id;
create table indi_pub_has_cc_licence_url stored as parquet as
select distinct p.id, (case when lic_host='' or lic_host is null then 0 else 1 end) as has_cc_license_url
from publication p
left outer join (select p.id, lower(parse_url(license.type, "HOST")) as lic_host
from publication p
join publication_licenses as license on license.id = p.id
WHERE lower(parse_url(license.type, 'HOST')) = 'creativecommons.org') tmp
on p.id= tmp.id;
create table indi_result_has_cc_licence_url stored as parquet as
select distinct r.id, case when lic_host='' or lic_host is null then 0 else 1 end as has_cc_license_url
from result r
left outer join (select r.id, lower(parse_url(license.type, "HOST")) as lic_host
from result r
join result_licenses as license on license.id = r.id
WHERE lower(parse_url(license.type, "HOST")) = "creativecommons.org") tmp
on r.id= tmp.id;
-- EOSC-TR1.1-02M:
-- ## Indicator: has_cc_license. Creative Commons licensing has become a
-- de facto standard in scholarly communication and is promoted by many initiatives
-- like Plan S. This indicator might be only useful when applied
-- like Plan S. This indicator might be only useful when applied
-- to openly available publications.
--create table indi_pub_has_cc_licence_tr stored as parquet as
--select distinct p.id, case when lic='' or lic is null then 0 else 1 end as has_cc_license_tr
--from publication p
--create table indi_pub_has_cc_licence_tr stored as parquet as
--select distinct p.id, case when lic='' or lic is null then 0 else 1 end indi_result_org_collabas has_cc_license_tr
--from publication p
--left outer join (select p.id, license.type as lic from publication p
--join publication_licenses as license on license.id = p.id
--where lower(license.type) LIKE '%creativecommons.org%' OR lower(license.type) LIKE '%cc-%') tmp
--join publication_licenses as license on license.id = p.id
--where lower(license.type) LIKE '%creativecommons.org%' OR lower(license.type) LIKE '%cc-%') tmp
--on p.id= tmp.id
-- #EOSC-F2-01M_cc Rich metadata for scholarly publications
-- ## Indicator: has_cc_license. Creative Commons licensing has become a
-- de facto standard in scholarly communication and is promoted by many initiatives
-- like Plan S. This indicator might be only useful when applied
-- like Plan S. This indicator might be only useful when applied
-- to openly available publications.
-- Same indicator as EOSC-TR1.1-02M (Najko's instructions)
-- create table indi_pub_has_cc_licence_f stored as parquet as
-- select
-- distinct p.id, case when lic='' or lic is null then 0 else 1 end as has_cc_license_f
-- from publication p
-- left outer join (selectp.id,license.type as lic from publication p
-- join publication_licenses as license on license.id = p.id
-- where lower(license.type) LIKE '%creativecommons.org%' OR lower(license.type) LIKE '%cc-%') tmp
-- from publication p
-- left outer join (selectp.id,license.type as lic from publication p
-- join publication_licenses as license on license.id = p.id
-- where lower(license.type) LIKE '%creativecommons.org%' OR lower(license.type) LIKE '%cc-%') tmp
-- on p.id= tmp.id
create table indi_pub_has_abstract stored as parquet as
select distinct publication.id, coalesce(abstract, 1) has_abstract
from publication;
create table indi_result_with_orcid stored as parquet as
create table indi_result_with_orcid stored as parquet as
select distinct r.id, coalesce(has_orcid, 0) as has_orcid
from result r
left outer join (select id, 1 as has_orcid from result_orcid) tmp
on r.id= tmp.id
create table indi_funded_result_with_fundref stored as parquet as
select distinct r.id, coalesce(fundref, 0) as fundref
from project_results r
left outer join (select distinct id, 1 as fundref from project_results
where provenance='Harvested') tmp
from result r
left outer join (select id, 1 as has_orcid from result_orcid) tmp
on r.id= tmp.id
create table indi_result_org_country_collab stored as parquet as
with tmp as
---- Sprint 3 ----
create table indi_funded_result_with_fundref stored as parquet as
select distinct r.id, coalesce(fundref, 0) as fundref
from project_results r
left outer join (select distinct id, 1 as fundref from project_results
where provenance='Harvested') tmp
on r.id= tmp.id
create table indi_result_org_country_collab stored as parquet as
with tmp as
(select o.id as id, o.country , ro.id as result,r.type from organization o
join result_organization ro on o.id=ro.organization
join result r on r.id=ro.id where o.country <> 'UNKNOWN')
select o1.id org1,o2.country country2, o1.type, count(distinct o1.result) as collaborations
from tmp as o1
join tmp as o2 on o1.result=o2.result
where o1.id<>o2.id and o1.country<>o2.country
where o1.id<>o2.id and o1.country<>o2.country
group by o1.id, o1.type,o2.country
create table indi_result_org_collab stored as parquet as
with tmp as
create table indi_result_org_collab stored as parquet as
with tmp as
(select o.id, ro.id as result,r.type from organization o
join result_organization ro on o.id=ro.organization
join result r on r.id=ro.id)
@ -299,19 +303,32 @@ join tmp as o2 on o1.result=o2.result
where o1.id<>o2.id
group by o1.id, o2.id, o1.type
create table indi_funder_country_collab stored as parquet as
with tmp as (select funder, project, country from organization_projects op
join organization o on o.id=op.id
join project p on p.id=op.project
create table indi_funder_country_collab stored as parquet as
with tmp as (select funder, project, country from organization_projects op
join organization o on o.id=op.id
join project p on p.id=op.project
where country <> 'UNKNOWN')
select f1.funder, f1.country, f2.country, count(distinct f1.project) as collaborations
from tmp as f1
join tmp as f2 on f1.project=f2.project
where f1.country<>f2.country
where f1.country<>f2.country
group by f1.funder, f2.country, f1.country
create table indi_result_country_collab stored as parquet as
with tmp as
(select country, ro.id as result,r.type from organization o
join result_organization ro on o.id=ro.organization
join result r on r.id=ro.id)
select o1.country country1, o2.country country2, o1.type, count(distinct o1.result) as collaborations
from tmp as o1
join tmp as o2 on o1.result=o2.result
where o1.country<>o2.country
group by o1.country, o2.country, o1.type
---- Sprint 4 ----
create table indi_pub_diamond stored as parquet as
select distinct pd.id, coalesce(in_diamond_journal, 0) as in_diamond_journal
select distinct pd.id, coalesce(in_diamond_journal, 0) as in_diamond_journal
from publication_datasources pd
left outer join (
select pd.id, 1 as in_diamond_journal from publication_datasources pd
@ -321,7 +338,7 @@ and (ps.journal_is_in_doaj=true or ps.journal_is_oa=true) and ps.has_apc=false)
on pd.id=tmp.id
create table indi_pub_hybrid stored as parquet as
select distinct pd.id, coalesce(is_hybrid, 0) as is_hybrid
select distinct pd.id, coalesce(is_hybrid, 0) as is_hybrid
from publication_datasources pd
left outer join (
select pd.id, 1 as is_hybrid from publication_datasources pd
@ -331,7 +348,7 @@ and (ps.journal_is_in_doaj=false and ps.journal_is_oa=false)) tmp
on pd.id=tmp.id
create table indi_is_gold_oa stored as parquet as
(select distinct pd.id, coalesce(gold_oa, 0) as gold_oa
(select distinct pd.id, coalesce(gold_oa, 0) as gold_oa
from publication_datasources pd
left outer join (
select pd.id, 1 as gold_oa from publication_datasources pd
@ -342,7 +359,7 @@ on pd.id=tmp.id)
create table indi_pub_in_transformative stored as parquet as
select distinct pd.id, coalesce(is_transformative, 0) as is_transformative
select distinct pd.id, coalesce(is_transformative, 0) as is_transformative
from publication pd
left outer join (
select pd.id, 1 as is_transformative from publication_datasources pd
@ -353,10 +370,15 @@ on pd.id=tmp.id
create table indi_pub_closed_other_open stored as parquet as
select distinct ri.id, coalesce(pub_closed_other_open, 0) as pub_closed_other_open from result_instance ri
left outer join
left outer join
(select ri.id, 1 as pub_closed_other_open from result_instance ri
join publication p on p.id=ri.id
join datasource d on ri.hostedby=d.id
where d.type like '%Journal%' and ri.accessright='Closed Access' and
(p.bestlicence='Open Access' or p.bestlicence='Open Source')) tmp
on tmp.id=ri.id
(p.bestlicence='Open Access' or p.bestlicence='Open Source')) tmp
on tmp.id=ri.id
---- Sprint 5 ----
create table indi_result_no_of_copies stored as parquet as
select id, count(id) as number_of_copies from result_instance group by id