1
0
Fork 0

Sprint 5 and other changes

This commit is contained in:
dimitrispie 2021-12-20 19:23:57 +02:00
parent 09fc2afdca
commit c1cdec09a9
1 changed files with 83 additions and 61 deletions

View File

@ -1,3 +1,4 @@
---- Sprint 1 ----
create table indi_pub_green_oa stored as parquet as create table indi_pub_green_oa stored as parquet as
select distinct p.id, coalesce(green_oa, 0) as green_oa select distinct p.id, coalesce(green_oa, 0) as green_oa
from publication p from publication p
@ -214,22 +215,23 @@ on p.id= tmp.id;
--select year, type, round(no_of_pubs/total*100,3) averageOfPubs --select year, type, round(no_of_pubs/total*100,3) averageOfPubs
--from total; --from total;
create table indi_pub_has_cc_licence stored as parquet as ---- Sprint 2 ----
select distinct p.id, (case when lic='' or lic is null then 0 else 1 end) as has_cc_license create table indi_result_has_cc_licence_f stored as parquet as
from publication p select distinct r.id, (case when lic='' or lic is null then 0 else 1 end) as has_cc_license
left outer join (select p.id, license.type as lic from publication p from result r
join publication_licenses as license on license.id = p.id left outer join (select r.id, license.type as lic from result r
join result_licenses as license on license.id = r.id
where lower(license.type) LIKE '%creativecommons.org%' OR lower(license.type) LIKE '%cc-%') tmp where lower(license.type) LIKE '%creativecommons.org%' OR lower(license.type) LIKE '%cc-%') tmp
on p.id= tmp.id; on r.id= tmp.id;
create table indi_pub_has_cc_licence_url stored as parquet as create table indi_result_has_cc_licence_url stored as parquet as
select distinct p.id, (case when lic_host='' or lic_host is null then 0 else 1 end) as has_cc_license_url select distinct r.id, case when lic_host='' or lic_host is null then 0 else 1 end as has_cc_license_url
from publication p from result r
left outer join (select p.id, lower(parse_url(license.type, "HOST")) as lic_host left outer join (select r.id, lower(parse_url(license.type, "HOST")) as lic_host
from publication p from result r
join publication_licenses as license on license.id = p.id join result_licenses as license on license.id = r.id
WHERE lower(parse_url(license.type, 'HOST')) = 'creativecommons.org') tmp WHERE lower(parse_url(license.type, "HOST")) = "creativecommons.org") tmp
on p.id= tmp.id; on r.id= tmp.id;
-- EOSC-TR1.1-02M: -- EOSC-TR1.1-02M:
-- ## Indicator: has_cc_license. Creative Commons licensing has become a -- ## Indicator: has_cc_license. Creative Commons licensing has become a
@ -237,7 +239,7 @@ on p.id= tmp.id;
-- like Plan S. This indicator might be only useful when applied -- like Plan S. This indicator might be only useful when applied
-- to openly available publications. -- to openly available publications.
--create table indi_pub_has_cc_licence_tr stored as parquet as --create table indi_pub_has_cc_licence_tr stored as parquet as
--select distinct p.id, case when lic='' or lic is null then 0 else 1 end as has_cc_license_tr --select distinct p.id, case when lic='' or lic is null then 0 else 1 end indi_result_org_collabas has_cc_license_tr
--from publication p --from publication p
--left outer join (select p.id, license.type as lic from publication p --left outer join (select p.id, license.type as lic from publication p
--join publication_licenses as license on license.id = p.id --join publication_licenses as license on license.id = p.id
@ -270,6 +272,8 @@ from result r
left outer join (select id, 1 as has_orcid from result_orcid) tmp left outer join (select id, 1 as has_orcid from result_orcid) tmp
on r.id= tmp.id on r.id= tmp.id
---- Sprint 3 ----
create table indi_funded_result_with_fundref stored as parquet as create table indi_funded_result_with_fundref stored as parquet as
select distinct r.id, coalesce(fundref, 0) as fundref select distinct r.id, coalesce(fundref, 0) as fundref
from project_results r from project_results r
@ -310,6 +314,19 @@ join tmp as f2 on f1.project=f2.project
where f1.country<>f2.country where f1.country<>f2.country
group by f1.funder, f2.country, f1.country group by f1.funder, f2.country, f1.country
create table indi_result_country_collab stored as parquet as
with tmp as
(select country, ro.id as result,r.type from organization o
join result_organization ro on o.id=ro.organization
join result r on r.id=ro.id)
select o1.country country1, o2.country country2, o1.type, count(distinct o1.result) as collaborations
from tmp as o1
join tmp as o2 on o1.result=o2.result
where o1.country<>o2.country
group by o1.country, o2.country, o1.type
---- Sprint 4 ----
create table indi_pub_diamond stored as parquet as create table indi_pub_diamond stored as parquet as
select distinct pd.id, coalesce(in_diamond_journal, 0) as in_diamond_journal select distinct pd.id, coalesce(in_diamond_journal, 0) as in_diamond_journal
from publication_datasources pd from publication_datasources pd
@ -360,3 +377,8 @@ join datasource d on ri.hostedby=d.id
where d.type like '%Journal%' and ri.accessright='Closed Access' and where d.type like '%Journal%' and ri.accessright='Closed Access' and
(p.bestlicence='Open Access' or p.bestlicence='Open Source')) tmp (p.bestlicence='Open Access' or p.bestlicence='Open Source')) tmp
on tmp.id=ri.id on tmp.id=ri.id
---- Sprint 5 ----
create table indi_result_no_of_copies stored as parquet as
select id, count(id) as number_of_copies from result_instance group by id