[stats] updates in the mapping, indicators, wf #145

Merged
claudio.atzori merged 14 commits from antonis.lempesis/dnet-hadoop:beta into beta 2021-09-27 15:59:55 +02:00
1 changed files with 131 additions and 36 deletions
Showing only changes of commit dd2329849f - Show all commits

View File

@ -1,44 +1,61 @@
create table TARGET.result_affiliated_country stored as parquet as create table TARGET.result_affiliated_country stored as parquet as
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
r.peer_reviewed, case when lower(rln.normalized) like 'cc-%' then true else false end as cc_licence, r.abstract as abstract, r.type, c.code as ccode, c.name as cname r.peer_reviewed, rln.count > 0 as cc_licence, r.abstract as abstract, r.type, c.code as ccode, c.name as cname
from SOURCE.result r from SOURCE.result r
join SOURCE.result_organization ro on ro.id=r.id join SOURCE.result_organization ro on ro.id=r.id
join SOURCE.organization o on o.id=ro.organization join SOURCE.organization o on o.id=ro.organization
join SOURCE.country c on c.code=o.country and c.continent_name='Europe' join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_licenses rl on rl.id=r.id
left outer join SOURCE.result_pids pids on pids.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id
left outer join SOURCE.licenses_normalized rln on rln.license=rl.type left outer join (
select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count
from SOURCE.result_licenses rl
left outer join SOURCE.licenses_normalized rln on rl.type=rln.license
group by rl.id
) rln on rln.id=r.id
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, c.code, c.name; group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, c.code, c.name;
create table TARGET.result_affiliated_year stored as parquet as create table TARGET.result_affiliated_year stored as parquet as
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, case when lower(rln.normalized) like 'cc-%' then true else false end as cc_licence, r.abstract as abstract, r.type, r.year case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed,
rln.count > 0 as cc_licence, r.abstract as abstract, r.type, r.year
from SOURCE.result r from SOURCE.result r
join SOURCE.result_organization ro on ro.id=r.id join SOURCE.result_organization ro on ro.id=r.id
join SOURCE.organization o on o.id=ro.organization join SOURCE.organization o on o.id=ro.organization
join SOURCE.country c on c.code=o.country and c.continent_name='Europe' join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_licenses rl on rl.id=r.id
left outer join SOURCE.result_pids pids on pids.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id
left outer join SOURCE.licenses_normalized rln on rln.license=rl.type left outer join (
select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count
from SOURCE.result_licenses rl
left outer join SOURCE.licenses_normalized rln on rl.type=rln.license
group by rl.id
) rln on rln.id=r.id
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, r.year; group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, r.year;
create table TARGET.result_affiliated_year_country stored as parquet as create table TARGET.result_affiliated_year_country stored as parquet as
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
r.peer_reviewed, case when lower(rln.normalized) like 'cc-%' then true else false end as cc_licence, r.abstract as abstract, r.type, r.year, c.code as ccode, c.name as cname r.peer_reviewed, rln.count > 0 as cc_licence, r.abstract as abstract, r.type, r.year, c.code as ccode, c.name as cname
from SOURCE.result r from SOURCE.result r
join SOURCE.result_organization ro on ro.id=r.id join SOURCE.result_organization ro on ro.id=r.id
join SOURCE.organization o on o.id=ro.organization join SOURCE.organization o on o.id=ro.organization
join SOURCE.country c on c.code=o.country and c.continent_name='Europe' join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_licenses rl on rl.id=r.id
left outer join SOURCE.result_pids pids on pids.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id
left outer join SOURCE.licenses_normalized rln on rln.license=rl.type left outer join (
select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count
from SOURCE.result_licenses rl
left outer join SOURCE.licenses_normalized rln on rl.type=rln.license
group by rl.id
) rln on rln.id=r.id
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, r.year, c.code, c.name; group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, r.year, c.code, c.name;
create table TARGET.result_affiliated_datasource stored as parquet as create table TARGET.result_affiliated_datasource stored as parquet as
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, case when lower(rln.normalized) like 'cc-%' then true else false end as cc_licence, r.abstract as abstract, r.type, d.name as dname case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed,
rln.count > 0 as cc_licence, r.abstract as abstract, r.type, d.name as dname
from SOURCE.result r from SOURCE.result r
join SOURCE.result_organization ro on ro.id=r.id join SOURCE.result_organization ro on ro.id=r.id
join SOURCE.organization o on o.id=ro.organization join SOURCE.organization o on o.id=ro.organization
@ -47,13 +64,18 @@ left outer join SOURCE.result_datasources rd on rd.id=r.id
left outer join SOURCE.datasource d on d.id=rd.datasource left outer join SOURCE.datasource d on d.id=rd.datasource
left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_licenses rl on rl.id=r.id
left outer join SOURCE.result_pids pids on pids.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id
left outer join SOURCE.licenses_normalized rln on rln.license=rl.type left outer join (
select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count
from SOURCE.result_licenses rl
left outer join SOURCE.licenses_normalized rln on rl.type=rln.license
group by rl.id
) rln on rln.id=r.id
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, d.name; group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, d.name;
create table TARGET.result_affiliated_datasource_country stored as parquet as create table TARGET.result_affiliated_datasource_country stored as parquet as
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
r.peer_reviewed, case when lower(rln.normalized) like 'cc-%' then true else false end as cc_licence, r.abstract as abstract, r.type, d.name as dname, c.code as ccode, c.name as cname r.peer_reviewed, rln.count > 0 as cc_licence, r.abstract as abstract, r.type, d.name as dname, c.code as ccode, c.name as cname
from SOURCE.result r from SOURCE.result r
join SOURCE.result_organization ro on ro.id=r.id join SOURCE.result_organization ro on ro.id=r.id
join SOURCE.organization o on o.id=ro.organization join SOURCE.organization o on o.id=ro.organization
@ -62,38 +84,54 @@ left outer join SOURCE.result_datasources rd on rd.id=r.id
left outer join SOURCE.datasource d on d.id=rd.datasource left outer join SOURCE.datasource d on d.id=rd.datasource
left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_licenses rl on rl.id=r.id
left outer join SOURCE.result_pids pids on pids.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id
left outer join SOURCE.licenses_normalized rln on rln.license=rl.type left outer join (
select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count
from SOURCE.result_licenses rl
left outer join SOURCE.licenses_normalized rln on rl.type=rln.license
group by rl.id
) rln on rln.id=r.id
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, d.name, c.code, c.name; group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, d.name, c.code, c.name;
create table TARGET.result_affiliated_organization stored as parquet as create table TARGET.result_affiliated_organization stored as parquet as
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
r.peer_reviewed, case when lower(rln.normalized) like 'cc-%' then true else false end as cc_licence, r.abstract as abstract, r.type, o.name as oname r.peer_reviewed, rln.count > 0 as cc_licence, r.abstract as abstract, r.type, o.name as oname
from SOURCE.result r from SOURCE.result r
join SOURCE.result_organization ro on ro.id=r.id join SOURCE.result_organization ro on ro.id=r.id
join SOURCE.organization o on o.id=ro.organization join SOURCE.organization o on o.id=ro.organization
join SOURCE.country c on c.code=o.country and c.continent_name='Europe' join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_licenses rl on rl.id=r.id
left outer join SOURCE.result_pids pids on pids.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id
left outer join SOURCE.licenses_normalized rln on rln.license=rl.type left outer join (
select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count
from SOURCE.result_licenses rl
left outer join SOURCE.licenses_normalized rln on rl.type=rln.license
group by rl.id
) rln on rln.id=r.id
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, o.name; group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, o.name;
create table TARGET.result_affiliated_organization_country stored as parquet as create table TARGET.result_affiliated_organization_country stored as parquet as
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
r.peer_reviewed, case when lower(rln.normalized) like 'cc-%' then true else false end as cc_licence, r.abstract as abstract, r.type, o.name as oname, c.code as ccode, c.name as cname r.peer_reviewed, rln.count > 0 as cc_licence, r.abstract as abstract, r.type, o.name as oname, c.code as ccode, c.name as cname
from SOURCE.result r from SOURCE.result r
join SOURCE.result_organization ro on ro.id=r.id join SOURCE.result_organization ro on ro.id=r.id
join SOURCE.organization o on o.id=ro.organization join SOURCE.organization o on o.id=ro.organization
join SOURCE.country c on c.code=o.country and c.continent_name='Europe' join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_licenses rl on rl.id=r.id
left outer join SOURCE.result_pids pids on pids.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id
left outer join SOURCE.licenses_normalized rln on rln.license=rl.type left outer join (
select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count
from SOURCE.result_licenses rl
left outer join SOURCE.licenses_normalized rln on rl.type=rln.license
group by rl.id
) rln on rln.id=r.id
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, o.name, c.code, c.name; group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, o.name, c.code, c.name;
create table TARGET.result_affiliated_funder stored as parquet as create table TARGET.result_affiliated_funder stored as parquet as
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, case when lower(rln.normalized) like 'cc-%' then true else false end as cc_licence, r.abstract as abstract, r.type, p.funder as pfunder case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed,
rln.count > 0 as cc_licence, r.abstract as abstract, r.type, p.funder as pfunder
from SOURCE.result r from SOURCE.result r
join SOURCE.result_organization ro on ro.id=r.id join SOURCE.result_organization ro on ro.id=r.id
join SOURCE.organization o on o.id=ro.organization join SOURCE.organization o on o.id=ro.organization
@ -102,13 +140,18 @@ join SOURCE.result_projects rp on rp.id=r.id
join SOURCE.project p on p.id=rp.project join SOURCE.project p on p.id=rp.project
left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_licenses rl on rl.id=r.id
left outer join SOURCE.result_pids pids on pids.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id
left outer join SOURCE.licenses_normalized rln on rln.license=rl.type left outer join (
select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count
from SOURCE.result_licenses rl
left outer join SOURCE.licenses_normalized rln on rl.type=rln.license
group by rl.id
) rln on rln.id=r.id
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, p.funder; group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, p.funder;
create table TARGET.result_affiliated_funder_country stored as parquet as create table TARGET.result_affiliated_funder_country stored as parquet as
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
r.peer_reviewed, case when lower(rln.normalized) like 'cc-%' then true else false end as cc_licence, r.abstract as abstract, r.type, p.funder as pfunder, c.code as ccode, c.name as cname r.peer_reviewed, rln.count > 0 as cc_licence, r.abstract as abstract, r.type, p.funder as pfunder, c.code as ccode, c.name as cname
from SOURCE.result r from SOURCE.result r
join SOURCE.result_organization ro on ro.id=r.id join SOURCE.result_organization ro on ro.id=r.id
join SOURCE.organization o on o.id=ro.organization join SOURCE.organization o on o.id=ro.organization
@ -117,13 +160,18 @@ join SOURCE.result_projects rp on rp.id=r.id
join SOURCE.project p on p.id=rp.project join SOURCE.project p on p.id=rp.project
left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_licenses rl on rl.id=r.id
left outer join SOURCE.result_pids pids on pids.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id
left outer join SOURCE.licenses_normalized rln on rln.license=rl.type left outer join (
select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count
from SOURCE.result_licenses rl
left outer join SOURCE.licenses_normalized rln on rl.type=rln.license
group by rl.id
) rln on rln.id=r.id
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, p.funder, c.code, c.name; group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, p.funder, c.code, c.name;
create table TARGET.result_deposited_country stored as parquet as create table TARGET.result_deposited_country stored as parquet as
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
r.peer_reviewed, case when lower(rln.normalized) like 'cc-%' then true else false end as cc_licence, r.abstract as abstract, r.type, c.code as ccode, c.name as cname r.peer_reviewed, rln.count > 0 as cc_licence, r.abstract as abstract, r.type, c.code as ccode, c.name as cname
from SOURCE.result r from SOURCE.result r
join SOURCE.result_datasources rd on rd.id=r.id join SOURCE.result_datasources rd on rd.id=r.id
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
@ -132,12 +180,18 @@ join SOURCE.organization o on o.id=dor.organization
join SOURCE.country c on c.code=o.country and c.continent_name='Europe' join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_licenses rl on rl.id=r.id
left outer join SOURCE.result_pids pids on pids.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id
left outer join SOURCE.licenses_normalized rln on rln.license=rl.type left outer join (
select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count
from SOURCE.result_licenses rl
left outer join SOURCE.licenses_normalized rln on rl.type=rln.license
group by rl.id
) rln on rln.id=r.id
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, c.code, c.name; group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, c.code, c.name;
create table TARGET.result_deposited_year stored as parquet as create table TARGET.result_deposited_year stored as parquet as
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, case when lower(rln.normalized) like 'cc-%' then true else false end as cc_licence, r.abstract as abstract, r.type, r.year case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed,
rln.count > 0 as cc_licence, r.abstract as abstract, r.type, r.year
from SOURCE.result r from SOURCE.result r
join SOURCE.result_datasources rd on rd.id=r.id join SOURCE.result_datasources rd on rd.id=r.id
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
@ -146,13 +200,18 @@ join SOURCE.organization o on o.id=dor.organization
join SOURCE.country c on c.code=o.country and c.continent_name='Europe' join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_licenses rl on rl.id=r.id
left outer join SOURCE.result_pids pids on pids.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id
left outer join SOURCE.licenses_normalized rln on rln.license=rl.type left outer join (
select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count
from SOURCE.result_licenses rl
left outer join SOURCE.licenses_normalized rln on rl.type=rln.license
group by rl.id
) rln on rln.id=r.id
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, r.year; group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, r.year;
create table TARGET.result_deposited_year_country stored as parquet as create table TARGET.result_deposited_year_country stored as parquet as
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
r.peer_reviewed, case when lower(rln.normalized) like 'cc-%' then true else false end as cc_licence, r.abstract as abstract, r.type, r.year, c.code as ccode, c.name as cname r.peer_reviewed, rln.count > 0 as cc_licence, r.abstract as abstract, r.type, r.year, c.code as ccode, c.name as cname
from SOURCE.result r from SOURCE.result r
join SOURCE.result_datasources rd on rd.id=r.id join SOURCE.result_datasources rd on rd.id=r.id
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
@ -161,13 +220,18 @@ join SOURCE.organization o on o.id=dor.organization
join SOURCE.country c on c.code=o.country and c.continent_name='Europe' join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_licenses rl on rl.id=r.id
left outer join SOURCE.result_pids pids on pids.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id
left outer join SOURCE.licenses_normalized rln on rln.license=rl.type left outer join (
select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count
from SOURCE.result_licenses rl
left outer join SOURCE.licenses_normalized rln on rl.type=rln.license
group by rl.id
) rln on rln.id=r.id
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, r.year, c.code, c.name; group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, r.year, c.code, c.name;
create table TARGET.result_deposited_datasource stored as parquet as create table TARGET.result_deposited_datasource stored as parquet as
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
r.peer_reviewed, case when lower(rln.normalized) like 'cc-%' then true else false end as cc_licence, r.abstract as abstract, r.type, d.name as dname r.peer_reviewed, rln.count > 0 as cc_licence, r.abstract as abstract, r.type, d.name as dname
from SOURCE.result r from SOURCE.result r
join SOURCE.result_datasources rd on rd.id=r.id join SOURCE.result_datasources rd on rd.id=r.id
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
@ -176,13 +240,18 @@ join SOURCE.organization o on o.id=dor.organization
join SOURCE.country c on c.code=o.country and c.continent_name='Europe' join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_licenses rl on rl.id=r.id
left outer join SOURCE.result_pids pids on pids.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id
left outer join SOURCE.licenses_normalized rln on rln.license=rl.type left outer join (
select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count
from SOURCE.result_licenses rl
left outer join SOURCE.licenses_normalized rln on rl.type=rln.license
group by rl.id
) rln on rln.id=r.id
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, d.name; group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, d.name;
create table TARGET.result_deposited_datasource_country stored as parquet as create table TARGET.result_deposited_datasource_country stored as parquet as
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
r.peer_reviewed, case when lower(rln.normalized) like 'cc-%' then true else false end as cc_licence, r.abstract as abstract, r.type, d.name as dname, c.code as ccode, c.name as cname r.peer_reviewed, rln.count > 0 as cc_licence, r.abstract as abstract, r.type, d.name as dname, c.code as ccode, c.name as cname
from SOURCE.result r from SOURCE.result r
join SOURCE.result_datasources rd on rd.id=r.id join SOURCE.result_datasources rd on rd.id=r.id
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
@ -191,12 +260,18 @@ join SOURCE.organization o on o.id=dor.organization
join SOURCE.country c on c.code=o.country and c.continent_name='Europe' join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_licenses rl on rl.id=r.id
left outer join SOURCE.result_pids pids on pids.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id
left outer join SOURCE.licenses_normalized rln on rln.license=rl.type left outer join (
select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count
from SOURCE.result_licenses rl
left outer join SOURCE.licenses_normalized rln on rl.type=rln.license
group by rl.id
) rln on rln.id=r.id
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, d.name, c.code, c.name; group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, d.name, c.code, c.name;
create table TARGET.result_deposited_organization stored as parquet as create table TARGET.result_deposited_organization stored as parquet as
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, case when lower(rln.normalized) like 'cc-%' then true else false end as cc_licence, r.abstract as abstract, r.type, o.name as oname case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed,
rln.count > 0 as cc_licence, r.abstract as abstract, r.type, o.name as oname
from SOURCE.result r from SOURCE.result r
join SOURCE.result_datasources rd on rd.id=r.id join SOURCE.result_datasources rd on rd.id=r.id
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
@ -205,13 +280,18 @@ join SOURCE.organization o on o.id=dor.organization
join SOURCE.country c on c.code=o.country and c.continent_name='Europe' join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_licenses rl on rl.id=r.id
left outer join SOURCE.result_pids pids on pids.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id
left outer join SOURCE.licenses_normalized rln on rln.license=rl.type left outer join (
select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count
from SOURCE.result_licenses rl
left outer join SOURCE.licenses_normalized rln on rl.type=rln.license
group by rl.id
) rln on rln.id=r.id
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, o.name; group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, o.name;
create table TARGET.result_deposited_organization_country stored as parquet as create table TARGET.result_deposited_organization_country stored as parquet as
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
r.peer_reviewed, case when lower(rln.normalized) like 'cc-%' then true else false end as cc_licence, r.abstract as abstract, r.type, o.name as oname, c.code as ccode, c.name as cname r.peer_reviewed, rln.count > 0 as cc_licence, r.abstract as abstract, r.type, o.name as oname, c.code as ccode, c.name as cname
from SOURCE.result r from SOURCE.result r
join SOURCE.result_datasources rd on rd.id=r.id join SOURCE.result_datasources rd on rd.id=r.id
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
@ -220,13 +300,18 @@ join SOURCE.organization o on o.id=dor.organization
join SOURCE.country c on c.code=o.country and c.continent_name='Europe' join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_licenses rl on rl.id=r.id
left outer join SOURCE.result_pids pids on pids.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id
left outer join SOURCE.licenses_normalized rln on rln.license=rl.type left outer join (
select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count
from SOURCE.result_licenses rl
left outer join SOURCE.licenses_normalized rln on rl.type=rln.license
group by rl.id
) rln on rln.id=r.id
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, o.name, c.code, c.name; group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, o.name, c.code, c.name;
create table TARGET.result_deposited_funder stored as parquet as create table TARGET.result_deposited_funder stored as parquet as
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
r.peer_reviewed, case when lower(rln.normalized) like 'cc-%' then true else false end as cc_licence, r.abstract as abstract, r.type, p.funder as pfunder r.peer_reviewed, rln.count > 0 as cc_licence, r.abstract as abstract, r.type, p.funder as pfunder
from SOURCE.result r from SOURCE.result r
join SOURCE.result_datasources rd on rd.id=r.id join SOURCE.result_datasources rd on rd.id=r.id
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
@ -237,13 +322,18 @@ join SOURCE.result_projects rp on rp.id=r.id
join SOURCE.project p on p.id=rp.project join SOURCE.project p on p.id=rp.project
left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_licenses rl on rl.id=r.id
left outer join SOURCE.result_pids pids on pids.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id
left outer join SOURCE.licenses_normalized rln on rln.license=rl.type left outer join (
select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count
from SOURCE.result_licenses rl
left outer join SOURCE.licenses_normalized rln on rl.type=rln.license
group by rl.id
) rln on rln.id=r.id
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, p.funder; group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, p.funder;
create table TARGET.result_deposited_funder_country stored as parquet as create table TARGET.result_deposited_funder_country stored as parquet as
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
r.peer_reviewed, case when lower(rln.normalized) like 'cc-%' then true else false end as cc_licence, r.abstract as abstract, r.type, p.funder as pfunder, c.code as ccode, c.name as cname r.peer_reviewed, rln.count > 0 as cc_licence, r.abstract as abstract, r.type, p.funder as pfunder, c.code as ccode, c.name as cname
from SOURCE.result r from SOURCE.result r
join SOURCE.result_datasources rd on rd.id=r.id join SOURCE.result_datasources rd on rd.id=r.id
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
@ -254,7 +344,12 @@ join SOURCE.result_projects rp on rp.id=r.id
join SOURCE.project p on p.id=rp.project join SOURCE.project p on p.id=rp.project
left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_licenses rl on rl.id=r.id
left outer join SOURCE.result_pids pids on pids.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id
left outer join SOURCE.licenses_normalized rln on rln.license=rl.type left outer join (
select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count
from SOURCE.result_licenses rl
left outer join SOURCE.licenses_normalized rln on rl.type=rln.license
group by rl.id
) rln on rln.id=r.id
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, p.funder, c.code, c.name; group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, p.funder, c.code, c.name;
compute stats TARGET.result_affiliated_country; compute stats TARGET.result_affiliated_country;