attempt to make the observatory wf run in hive

This commit is contained in:
Antonis Lempesis 2021-09-18 00:35:14 +03:00
parent 2943287d10
commit 8b681dcf1b
2 changed files with 527 additions and 317 deletions

View File

@ -18,7 +18,9 @@ echo "Creating observatory database"
impala-shell -q "drop database if exists ${TARGET} cascade" impala-shell -q "drop database if exists ${TARGET} cascade"
impala-shell -q "create database if not exists ${TARGET}" impala-shell -q "create database if not exists ${TARGET}"
impala-shell -d ${SOURCE} -q "show tables" --delimited | sed "s/\(.*\)/create view ${TARGET}.\1 as select * from ${SOURCE}.\1;/" | impala-shell -f - impala-shell -d ${SOURCE} -q "show tables" --delimited | sed "s/\(.*\)/create view ${TARGET}.\1 as select * from ${SOURCE}.\1;/" | impala-shell -f -
cat step21-createObservatoryDB.sql | sed s/SOURCE/$1/g | sed s/TARGET/$2/g1 | impala-shell -f - cat step21-createObservatoryDB.sql | sed s/SOURCE/$1/g | sed s/TARGET/$2/g1 | hive -f -
impala-shell -q "invalidate metadata;"
impala-shell -d ${TARGET} -q "show tables" --delimited | sed "s/\(.*\)/compute stats ${TARGET}.\1;/" | impala-shell -f -
echo "Impala shell finished" echo "Impala shell finished"
echo "Updating shadow observatory database" echo "Updating shadow observatory database"

View File

@ -1,372 +1,580 @@
create table TARGET.result_affiliated_country stored as parquet as create table TARGET.result_cc_licence stored as parquet as
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, select r.id, coalesce(rln.count, 0) > 0 as cc_licence
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
r.peer_reviewed, coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, c.code as ccode, c.name as cname
from SOURCE.result r from SOURCE.result r
join SOURCE.result_organization ro on ro.id=r.id left outer join (
join SOURCE.organization o on o.id=ro.organization
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
left outer join SOURCE.result_licenses rl on rl.id=r.id
left outer join SOURCE.result_pids pids on pids.id=r.id
left outer join (
select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count
from SOURCE.result_licenses rl from SOURCE.result_licenses rl
left outer join SOURCE.licenses_normalized rln on rl.type=rln.license left outer join SOURCE.licenses_normalized rln on rl.type=rln.license
group by rl.id group by rl.id
) rln on rln.id=r.id ) rln on rln.id=r.id;
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, c.code, c.name;
create table TARGET.result_affiliated_country stored as parquet as
select
count(distinct r.id) as total,
r.green,
r.gold,
case when rl.type is not null then true else false end as licence,
case when pids.pid is not null then true else false end as pid,
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
r.peer_reviewed,
rln.cc_licence,
r.abstract as abstract,
r.authors > 1 as multiple_authors,
rpc.count > 1 as multiple_projects,
rfc.count > 1 as multiple_funders,
r.type,
c.code as ccode, c.name as cname
from SOURCE.result r
join SOURCE.result_organization ro on ro.id=r.id
join SOURCE.organization o on o.id=ro.organization
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
left outer join SOURCE.result_licenses rl on rl.id=r.id
left outer join SOURCE.result_pids pids on pids.id=r.id
left outer join SOURCE.result_cc_licence rln on rln.id=r.id
left outer join SOURCE.result_projectcount rpc on rpc.id=r.id
left outer join SOURCE.result_fundercount rfc on rfc.id=r.id
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, c.code, c.name;
create table TARGET.result_affiliated_year stored as parquet as create table TARGET.result_affiliated_year stored as parquet as
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, select
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, count(distinct r.id) as total,
coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, r.year r.green,
r.gold,
case when rl.type is not null then true else false end as licence,
case when pids.pid is not null then true else false end as pid,
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
r.peer_reviewed,
rln.cc_licence,
r.abstract as abstract,
r.authors > 1 as multiple_authors,
rpc.count > 1 as multiple_projects,
rfc.count > 1 as multiple_funders,
r.type,
r.year
from SOURCE.result r from SOURCE.result r
join SOURCE.result_organization ro on ro.id=r.id join SOURCE.result_organization ro on ro.id=r.id
join SOURCE.organization o on o.id=ro.organization join SOURCE.organization o on o.id=ro.organization
join SOURCE.country c on c.code=o.country and c.continent_name='Europe' join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_licenses rl on rl.id=r.id
left outer join SOURCE.result_pids pids on pids.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id
left outer join ( left outer join SOURCE.result_cc_licence rln on rln.id=r.id
select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count left outer join SOURCE.result_projectcount rpc on rpc.id=r.id
from SOURCE.result_licenses rl left outer join SOURCE.result_fundercount rfc on rfc.id=r.id
left outer join SOURCE.licenses_normalized rln on rl.type=rln.license group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
group by rl.id case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
) rln on rln.id=r.id cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, r.year;
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, r.year;
create table TARGET.result_affiliated_year_country stored as parquet as create table TARGET.result_affiliated_year_country stored as parquet as
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, select
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, count(distinct r.id) as total,
r.peer_reviewed, coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, r.year, c.code as ccode, c.name as cname r.green,
r.gold,
case when rl.type is not null then true else false end as licence,
case when pids.pid is not null then true else false end as pid,
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
r.peer_reviewed,
rln.cc_licence,
r.abstract as abstract,
r.authors > 1 as multiple_authors,
rpc.count > 1 as multiple_projects,
rfc.count > 1 as multiple_funders,
r.type,
r.year, c.code as ccode, c.name as cname
from SOURCE.result r from SOURCE.result r
join SOURCE.result_organization ro on ro.id=r.id join SOURCE.result_organization ro on ro.id=r.id
join SOURCE.organization o on o.id=ro.organization join SOURCE.organization o on o.id=ro.organization
join SOURCE.country c on c.code=o.country and c.continent_name='Europe' join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_licenses rl on rl.id=r.id
left outer join SOURCE.result_pids pids on pids.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id
left outer join ( left outer join SOURCE.result_cc_licence rln on rln.id=r.id
select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count left outer join SOURCE.result_projectcount rpc on rpc.id=r.id
from SOURCE.result_licenses rl left outer join SOURCE.result_fundercount rfc on rfc.id=r.id
left outer join SOURCE.licenses_normalized rln on rl.type=rln.license group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
group by rl.id case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
) rln on rln.id=r.id cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, r.year, c.code, c.name;
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, r.year, c.code, c.name;
create table TARGET.result_affiliated_datasource stored as parquet as create table TARGET.result_affiliated_datasource stored as parquet as
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, select
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, count(distinct r.id) as total,
coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, d.name as dname r.green,
r.gold,
case when rl.type is not null then true else false end as licence,
case when pids.pid is not null then true else false end as pid,
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
r.peer_reviewed,
rln.cc_licence,
r.abstract as abstract,
r.authors > 1 as multiple_authors,
rpc.count > 1 as multiple_projects,
rfc.count > 1 as multiple_funders,
r.type,
d.name as dname
from SOURCE.result r from SOURCE.result r
join SOURCE.result_organization ro on ro.id=r.id join SOURCE.result_organization ro on ro.id=r.id
join SOURCE.organization o on o.id=ro.organization join SOURCE.organization o on o.id=ro.organization
join SOURCE.country c on c.code=o.country and c.continent_name='Europe' join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
left outer join SOURCE.result_datasources rd on rd.id=r.id left outer join SOURCE.result_datasources rd on rd.id=r.id
left outer join SOURCE.datasource d on d.id=rd.datasource left outer join SOURCE.datasource d on d.id=rd.datasource
left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_licenses rl on rl.id=r.id
left outer join SOURCE.result_pids pids on pids.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id
left outer join ( left outer join SOURCE.result_cc_licence rln on rln.id=r.id
select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count left outer join SOURCE.result_projectcount rpc on rpc.id=r.id
from SOURCE.result_licenses rl left outer join SOURCE.result_fundercount rfc on rfc.id=r.id
left outer join SOURCE.licenses_normalized rln on rl.type=rln.license group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
group by rl.id case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
) rln on rln.id=r.id cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, d.name;
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, d.name;
create table TARGET.result_affiliated_datasource_country stored as parquet as create table TARGET.result_affiliated_datasource_country stored as parquet as
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, select
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, count(distinct r.id) as total,
r.peer_reviewed, coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, d.name as dname, c.code as ccode, c.name as cname r.green,
r.gold,
case when rl.type is not null then true else false end as licence,
case when pids.pid is not null then true else false end as pid,
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
r.peer_reviewed,
rln.cc_licence,
r.abstract as abstract,
r.authors > 1 as multiple_authors,
rpc.count > 1 as multiple_projects,
rfc.count > 1 as multiple_funders,
r.type,
d.name as dname, c.code as ccode, c.name as cname
from SOURCE.result r from SOURCE.result r
join SOURCE.result_organization ro on ro.id=r.id join SOURCE.result_organization ro on ro.id=r.id
join SOURCE.organization o on o.id=ro.organization join SOURCE.organization o on o.id=ro.organization
join SOURCE.country c on c.code=o.country and c.continent_name='Europe' join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
left outer join SOURCE.result_datasources rd on rd.id=r.id left outer join SOURCE.result_datasources rd on rd.id=r.id
left outer join SOURCE.datasource d on d.id=rd.datasource left outer join SOURCE.datasource d on d.id=rd.datasource
left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_licenses rl on rl.id=r.id
left outer join SOURCE.result_pids pids on pids.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id
left outer join ( left outer join SOURCE.result_cc_licence rln on rln.id=r.id
select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count left outer join SOURCE.result_projectcount rpc on rpc.id=r.id
from SOURCE.result_licenses rl left outer join SOURCE.result_fundercount rfc on rfc.id=r.id
left outer join SOURCE.licenses_normalized rln on rl.type=rln.license group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
group by rl.id case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
) rln on rln.id=r.id cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, d.name, c.code, c.name;
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, d.name, c.code, c.name;
create table TARGET.result_affiliated_organization stored as parquet as create table TARGET.result_affiliated_organization stored as parquet as
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, select
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, count(distinct r.id) as total,
r.peer_reviewed, coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, o.name as oname r.green,
r.gold,
case when rl.type is not null then true else false end as licence,
case when pids.pid is not null then true else false end as pid,
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
r.peer_reviewed,
rln.cc_licence,
r.abstract as abstract,
r.authors > 1 as multiple_authors,
rpc.count > 1 as multiple_projects,
rfc.count > 1 as multiple_funders,
r.type,
o.name as oname
from SOURCE.result r from SOURCE.result r
join SOURCE.result_organization ro on ro.id=r.id join SOURCE.result_organization ro on ro.id=r.id
join SOURCE.organization o on o.id=ro.organization join SOURCE.organization o on o.id=ro.organization
join SOURCE.country c on c.code=o.country and c.continent_name='Europe' join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_licenses rl on rl.id=r.id
left outer join SOURCE.result_pids pids on pids.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id
left outer join ( left outer join SOURCE.result_cc_licence rln on rln.id=r.id
select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count left outer join SOURCE.result_projectcount rpc on rpc.id=r.id
from SOURCE.result_licenses rl left outer join SOURCE.result_fundercount rfc on rfc.id=r.id
left outer join SOURCE.licenses_normalized rln on rl.type=rln.license group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
group by rl.id case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
) rln on rln.id=r.id cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, o.name;
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, o.name;
create table TARGET.result_affiliated_organization_country stored as parquet as create table TARGET.result_affiliated_organization_country stored as parquet as
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, select
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, count(distinct r.id) as total,
r.peer_reviewed, coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, o.name as oname, c.code as ccode, c.name as cname r.green,
r.gold,
case when rl.type is not null then true else false end as licence,
case when pids.pid is not null then true else false end as pid,
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
r.peer_reviewed,
rln.cc_licence,
r.abstract as abstract,
r.authors > 1 as multiple_authors,
rpc.count > 1 as multiple_projects,
rfc.count > 1 as multiple_funders,
r.type,
o.name as oname, c.code as ccode, c.name as cname
from SOURCE.result r from SOURCE.result r
join SOURCE.result_organization ro on ro.id=r.id join SOURCE.result_organization ro on ro.id=r.id
join SOURCE.organization o on o.id=ro.organization join SOURCE.organization o on o.id=ro.organization
join SOURCE.country c on c.code=o.country and c.continent_name='Europe' join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_licenses rl on rl.id=r.id
left outer join SOURCE.result_pids pids on pids.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id
left outer join ( left outer join SOURCE.result_cc_licence rln on rln.id=r.id
select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count left outer join SOURCE.result_projectcount rpc on rpc.id=r.id
from SOURCE.result_licenses rl left outer join SOURCE.result_fundercount rfc on rfc.id=r.id
left outer join SOURCE.licenses_normalized rln on rl.type=rln.license group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
group by rl.id case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
) rln on rln.id=r.id cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, o.name, c.code, c.name;
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, o.name, c.code, c.name;
create table TARGET.result_affiliated_funder stored as parquet as create table TARGET.result_affiliated_funder stored as parquet as
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, select
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, count(distinct r.id) as total,
coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, p.funder as pfunder r.green,
r.gold,
case when rl.type is not null then true else false end as licence,
case when pids.pid is not null then true else false end as pid,
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
r.peer_reviewed,
rln.cc_licence,
r.abstract as abstract,
r.authors > 1 as multiple_authors,
rpc.count > 1 as multiple_projects,
rfc.count > 1 as multiple_funders,
r.type,
p.funder as pfunder
from SOURCE.result r from SOURCE.result r
join SOURCE.result_organization ro on ro.id=r.id join SOURCE.result_organization ro on ro.id=r.id
join SOURCE.organization o on o.id=ro.organization join SOURCE.organization o on o.id=ro.organization
join SOURCE.country c on c.code=o.country and c.continent_name='Europe' join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
join SOURCE.result_projects rp on rp.id=r.id join SOURCE.result_projects rp on rp.id=r.id
join SOURCE.project p on p.id=rp.project join SOURCE.project p on p.id=rp.project
left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_licenses rl on rl.id=r.id
left outer join SOURCE.result_pids pids on pids.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id
left outer join ( left outer join SOURCE.result_cc_licence rln on rln.id=r.id
select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count left outer join SOURCE.result_projectcount rpc on rpc.id=r.id
from SOURCE.result_licenses rl left outer join SOURCE.result_fundercount rfc on rfc.id=r.id
left outer join SOURCE.licenses_normalized rln on rl.type=rln.license group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
group by rl.id case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
) rln on rln.id=r.id cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, p.funder;
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, p.funder;
create table TARGET.result_affiliated_funder_country stored as parquet as create table TARGET.result_affiliated_funder_country stored as parquet as
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, select
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, count(distinct r.id) as total,
r.peer_reviewed, coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, p.funder as pfunder, c.code as ccode, c.name as cname r.green,
r.gold,
case when rl.type is not null then true else false end as licence,
case when pids.pid is not null then true else false end as pid,
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
r.peer_reviewed,
rln.cc_licence,
r.abstract as abstract,
r.authors > 1 as multiple_authors,
rpc.count > 1 as multiple_projects,
rfc.count > 1 as multiple_funders,
r.type,
p.funder as pfunder, c.code as ccode, c.name as cname
from SOURCE.result r from SOURCE.result r
join SOURCE.result_organization ro on ro.id=r.id join SOURCE.result_organization ro on ro.id=r.id
join SOURCE.organization o on o.id=ro.organization join SOURCE.organization o on o.id=ro.organization
join SOURCE.country c on c.code=o.country and c.continent_name='Europe' join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
join SOURCE.result_projects rp on rp.id=r.id join SOURCE.result_projects rp on rp.id=r.id
join SOURCE.project p on p.id=rp.project join SOURCE.project p on p.id=rp.project
left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_licenses rl on rl.id=r.id
left outer join SOURCE.result_pids pids on pids.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id
left outer join ( left outer join SOURCE.result_cc_licence rln on rln.id=r.id
select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count left outer join SOURCE.result_projectcount rpc on rpc.id=r.id
from SOURCE.result_licenses rl left outer join SOURCE.result_fundercount rfc on rfc.id=r.id
left outer join SOURCE.licenses_normalized rln on rl.type=rln.license group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
group by rl.id case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
) rln on rln.id=r.id cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, p.funder, c.code, c.name;
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, p.funder, c.code, c.name;
create table TARGET.result_deposited_country stored as parquet as create table TARGET.result_deposited_country stored as parquet as
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, select
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, count(distinct r.id) as total,
r.peer_reviewed, coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, c.code as ccode, c.name as cname r.green,
r.gold,
case when rl.type is not null then true else false end as licence,
case when pids.pid is not null then true else false end as pid,
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
r.peer_reviewed,
rln.cc_licence,
r.abstract as abstract,
r.authors > 1 as multiple_authors,
rpc.count > 1 as multiple_projects,
rfc.count > 1 as multiple_funders,
r.type,
c.code as ccode, c.name as cname
from SOURCE.result r from SOURCE.result r
join SOURCE.result_datasources rd on rd.id=r.id join SOURCE.result_datasources rd on rd.id=r.id
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
join SOURCE.datasource_organizations dor on dor.id=d.id join SOURCE.datasource_organizations dor on dor.id=d.id
join SOURCE.organization o on o.id=dor.organization join SOURCE.organization o on o.id=dor.organization
join SOURCE.country c on c.code=o.country and c.continent_name='Europe' join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_licenses rl on rl.id=r.id
left outer join SOURCE.result_pids pids on pids.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id
left outer join ( left outer join SOURCE.result_cc_licence rln on rln.id=r.id
select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count left outer join SOURCE.result_projectcount rpc on rpc.id=r.id
from SOURCE.result_licenses rl left outer join SOURCE.result_fundercount rfc on rfc.id=r.id
left outer join SOURCE.licenses_normalized rln on rl.type=rln.license group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
group by rl.id case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
) rln on rln.id=r.id cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, c.code, c.name;
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, c.code, c.name;
create table TARGET.result_deposited_year stored as parquet as create table TARGET.result_deposited_year stored as parquet as
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, select
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, count(distinct r.id) as total,
coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, r.year r.green,
r.gold,
case when rl.type is not null then true else false end as licence,
case when pids.pid is not null then true else false end as pid,
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
r.peer_reviewed,
rln.cc_licence,
r.abstract as abstract,
r.authors > 1 as multiple_authors,
rpc.count > 1 as multiple_projects,
rfc.count > 1 as multiple_funders,
r.type,
r.year
from SOURCE.result r from SOURCE.result r
join SOURCE.result_datasources rd on rd.id=r.id join SOURCE.result_datasources rd on rd.id=r.id
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
join SOURCE.datasource_organizations dor on dor.id=d.id join SOURCE.datasource_organizations dor on dor.id=d.id
join SOURCE.organization o on o.id=dor.organization join SOURCE.organization o on o.id=dor.organization
join SOURCE.country c on c.code=o.country and c.continent_name='Europe' join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_licenses rl on rl.id=r.id
left outer join SOURCE.result_pids pids on pids.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id
left outer join ( left outer join SOURCE.result_cc_licence rln on rln.id=r.id
select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count left outer join SOURCE.result_projectcount rpc on rpc.id=r.id
from SOURCE.result_licenses rl left outer join SOURCE.result_fundercount rfc on rfc.id=r.id
left outer join SOURCE.licenses_normalized rln on rl.type=rln.license group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
group by rl.id case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
) rln on rln.id=r.id cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, r.year;
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, r.year;
create table TARGET.result_deposited_year_country stored as parquet as create table TARGET.result_deposited_year_country stored as parquet as
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, select
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, count(distinct r.id) as total,
r.peer_reviewed, coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, r.year, c.code as ccode, c.name as cname r.green,
r.gold,
case when rl.type is not null then true else false end as licence,
case when pids.pid is not null then true else false end as pid,
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
r.peer_reviewed,
rln.cc_licence,
r.abstract as abstract,
r.authors > 1 as multiple_authors,
rpc.count > 1 as multiple_projects,
rfc.count > 1 as multiple_funders,
r.type,
r.year, c.code as ccode, c.name as cname
from SOURCE.result r from SOURCE.result r
join SOURCE.result_datasources rd on rd.id=r.id join SOURCE.result_datasources rd on rd.id=r.id
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
join SOURCE.datasource_organizations dor on dor.id=d.id join SOURCE.datasource_organizations dor on dor.id=d.id
join SOURCE.organization o on o.id=dor.organization join SOURCE.organization o on o.id=dor.organization
join SOURCE.country c on c.code=o.country and c.continent_name='Europe' join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_licenses rl on rl.id=r.id
left outer join SOURCE.result_pids pids on pids.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id
left outer join ( left outer join SOURCE.result_cc_licence rln on rln.id=r.id
select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count left outer join SOURCE.result_projectcount rpc on rpc.id=r.id
from SOURCE.result_licenses rl left outer join SOURCE.result_fundercount rfc on rfc.id=r.id
left outer join SOURCE.licenses_normalized rln on rl.type=rln.license group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
group by rl.id case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
) rln on rln.id=r.id cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, r.year, c.code, c.name;
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, r.year, c.code, c.name;
create table TARGET.result_deposited_datasource stored as parquet as create table TARGET.result_deposited_datasource stored as parquet as
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, select
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, count(distinct r.id) as total,
r.peer_reviewed, coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, d.name as dname r.green,
r.gold,
case when rl.type is not null then true else false end as licence,
case when pids.pid is not null then true else false end as pid,
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
r.peer_reviewed,
rln.cc_licence,
r.abstract as abstract,
r.authors > 1 as multiple_authors,
rpc.count > 1 as multiple_projects,
rfc.count > 1 as multiple_funders,
r.type,
d.name as dname
from SOURCE.result r from SOURCE.result r
join SOURCE.result_datasources rd on rd.id=r.id join SOURCE.result_datasources rd on rd.id=r.id
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
join SOURCE.datasource_organizations dor on dor.id=d.id join SOURCE.datasource_organizations dor on dor.id=d.id
join SOURCE.organization o on o.id=dor.organization join SOURCE.organization o on o.id=dor.organization
join SOURCE.country c on c.code=o.country and c.continent_name='Europe' join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_licenses rl on rl.id=r.id
left outer join SOURCE.result_pids pids on pids.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id
left outer join ( left outer join SOURCE.result_cc_licence rln on rln.id=r.id
select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count left outer join SOURCE.result_projectcount rpc on rpc.id=r.id
from SOURCE.result_licenses rl left outer join SOURCE.result_fundercount rfc on rfc.id=r.id
left outer join SOURCE.licenses_normalized rln on rl.type=rln.license group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
group by rl.id case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
) rln on rln.id=r.id cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, d.name;
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, d.name;
create table TARGET.result_deposited_datasource_country stored as parquet as create table TARGET.result_deposited_datasource_country stored as parquet as
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, select
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, count(distinct r.id) as total,
r.peer_reviewed, coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, d.name as dname, c.code as ccode, c.name as cname r.green,
r.gold,
case when rl.type is not null then true else false end as licence,
case when pids.pid is not null then true else false end as pid,
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
r.peer_reviewed,
rln.cc_licence,
r.abstract as abstract,
r.authors > 1 as multiple_authors,
rpc.count > 1 as multiple_projects,
rfc.count > 1 as multiple_funders,
r.type,
d.name as dname, c.code as ccode, c.name as cname
from SOURCE.result r from SOURCE.result r
join SOURCE.result_datasources rd on rd.id=r.id join SOURCE.result_datasources rd on rd.id=r.id
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
join SOURCE.datasource_organizations dor on dor.id=d.id join SOURCE.datasource_organizations dor on dor.id=d.id
join SOURCE.organization o on o.id=dor.organization join SOURCE.organization o on o.id=dor.organization
join SOURCE.country c on c.code=o.country and c.continent_name='Europe' join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_licenses rl on rl.id=r.id
left outer join SOURCE.result_pids pids on pids.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id
left outer join ( left outer join SOURCE.result_cc_licence rln on rln.id=r.id
select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count left outer join SOURCE.result_projectcount rpc on rpc.id=r.id
from SOURCE.result_licenses rl left outer join SOURCE.result_fundercount rfc on rfc.id=r.id
left outer join SOURCE.licenses_normalized rln on rl.type=rln.license group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
group by rl.id case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
) rln on rln.id=r.id cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, d.name, c.code, c.name;
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, d.name, c.code, c.name;
create table TARGET.result_deposited_organization stored as parquet as create table TARGET.result_deposited_organization stored as parquet as
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, select
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, count(distinct r.id) as total,
coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, o.name as oname r.green,
r.gold,
case when rl.type is not null then true else false end as licence,
case when pids.pid is not null then true else false end as pid,
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
r.peer_reviewed,
rln.cc_licence,
r.abstract as abstract,
r.authors > 1 as multiple_authors,
rpc.count > 1 as multiple_projects,
rfc.count > 1 as multiple_funders,
r.type,
o.name as oname
from SOURCE.result r from SOURCE.result r
join SOURCE.result_datasources rd on rd.id=r.id join SOURCE.result_datasources rd on rd.id=r.id
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
join SOURCE.datasource_organizations dor on dor.id=d.id join SOURCE.datasource_organizations dor on dor.id=d.id
join SOURCE.organization o on o.id=dor.organization join SOURCE.organization o on o.id=dor.organization
join SOURCE.country c on c.code=o.country and c.continent_name='Europe' join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_licenses rl on rl.id=r.id
left outer join SOURCE.result_pids pids on pids.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id
left outer join ( left outer join SOURCE.result_cc_licence rln on rln.id=r.id
select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count left outer join SOURCE.result_projectcount rpc on rpc.id=r.id
from SOURCE.result_licenses rl left outer join SOURCE.result_fundercount rfc on rfc.id=r.id
left outer join SOURCE.licenses_normalized rln on rl.type=rln.license group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
group by rl.id case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
) rln on rln.id=r.id cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, o.name;
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, o.name;
create table TARGET.result_deposited_organization_country stored as parquet as create table TARGET.result_deposited_organization_country stored as parquet as
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, select
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, count(distinct r.id) as total,
r.peer_reviewed, coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, o.name as oname, c.code as ccode, c.name as cname r.green,
r.gold,
case when rl.type is not null then true else false end as licence,
case when pids.pid is not null then true else false end as pid,
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
r.peer_reviewed,
rln.cc_licence,
r.abstract as abstract,
r.authors > 1 as multiple_authors,
rpc.count > 1 as multiple_projects,
rfc.count > 1 as multiple_funders,
r.type,
o.name as oname, c.code as ccode, c.name as cname
from SOURCE.result r from SOURCE.result r
join SOURCE.result_datasources rd on rd.id=r.id join SOURCE.result_datasources rd on rd.id=r.id
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
join SOURCE.datasource_organizations dor on dor.id=d.id join SOURCE.datasource_organizations dor on dor.id=d.id
join SOURCE.organization o on o.id=dor.organization join SOURCE.organization o on o.id=dor.organization
join SOURCE.country c on c.code=o.country and c.continent_name='Europe' join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_licenses rl on rl.id=r.id
left outer join SOURCE.result_pids pids on pids.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id
left outer join ( left outer join SOURCE.result_cc_licence rln on rln.id=r.id
select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count left outer join SOURCE.result_projectcount rpc on rpc.id=r.id
from SOURCE.result_licenses rl left outer join SOURCE.result_fundercount rfc on rfc.id=r.id
left outer join SOURCE.licenses_normalized rln on rl.type=rln.license group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
group by rl.id case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
) rln on rln.id=r.id cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, o.name, c.code, c.name;
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, o.name, c.code, c.name;
create table TARGET.result_deposited_funder stored as parquet as create table TARGET.result_deposited_funder stored as parquet as
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, select
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, count(distinct r.id) as total,
r.peer_reviewed, coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, p.funder as pfunder r.green,
r.gold,
case when rl.type is not null then true else false end as licence,
case when pids.pid is not null then true else false end as pid,
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
r.peer_reviewed,
rln.cc_licence,
r.abstract as abstract,
r.authors > 1 as multiple_authors,
rpc.count > 1 as multiple_projects,
rfc.count > 1 as multiple_funders,
r.type,
p.funder as pfunder
from SOURCE.result r from SOURCE.result r
join SOURCE.result_datasources rd on rd.id=r.id join SOURCE.result_datasources rd on rd.id=r.id
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
join SOURCE.datasource_organizations dor on dor.id=d.id join SOURCE.datasource_organizations dor on dor.id=d.id
join SOURCE.organization o on o.id=dor.organization join SOURCE.organization o on o.id=dor.organization
join SOURCE.country c on c.code=o.country and c.continent_name='Europe' join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
join SOURCE.result_projects rp on rp.id=r.id join SOURCE.result_projects rp on rp.id=r.id
join SOURCE.project p on p.id=rp.project join SOURCE.project p on p.id=rp.project
left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_licenses rl on rl.id=r.id
left outer join SOURCE.result_pids pids on pids.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id
left outer join ( left outer join SOURCE.result_cc_licence rln on rln.id=r.id
select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count left outer join SOURCE.result_projectcount rpc on rpc.id=r.id
from SOURCE.result_licenses rl left outer join SOURCE.result_fundercount rfc on rfc.id=r.id
left outer join SOURCE.licenses_normalized rln on rl.type=rln.license group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
group by rl.id case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
) rln on rln.id=r.id cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, p.funder;
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, p.funder;
create table TARGET.result_deposited_funder_country stored as parquet as create table TARGET.result_deposited_funder_country stored as parquet as
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, select
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, count(distinct r.id) as total,
r.peer_reviewed, coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, p.funder as pfunder, c.code as ccode, c.name as cname r.green,
r.gold,
case when rl.type is not null then true else false end as licence,
case when pids.pid is not null then true else false end as pid,
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
r.peer_reviewed,
rln.cc_licence,
r.abstract as abstract,
r.authors > 1 as multiple_authors,
rpc.count > 1 as multiple_projects,
rfc.count > 1 as multiple_funders,
r.type,
p.funder as pfunder, c.code as ccode, c.name as cname
from SOURCE.result r from SOURCE.result r
join SOURCE.result_datasources rd on rd.id=r.id join SOURCE.result_datasources rd on rd.id=r.id
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
join SOURCE.datasource_organizations dor on dor.id=d.id join SOURCE.datasource_organizations dor on dor.id=d.id
join SOURCE.organization o on o.id=dor.organization join SOURCE.organization o on o.id=dor.organization
join SOURCE.country c on c.code=o.country and c.continent_name='Europe' join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
join SOURCE.result_projects rp on rp.id=r.id join SOURCE.result_projects rp on rp.id=r.id
join SOURCE.project p on p.id=rp.project join SOURCE.project p on p.id=rp.project
left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_licenses rl on rl.id=r.id
left outer join SOURCE.result_pids pids on pids.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id
left outer join ( left outer join SOURCE.result_cc_licence rln on rln.id=r.id
select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count left outer join SOURCE.result_projectcount rpc on rpc.id=r.id
from SOURCE.result_licenses rl left outer join SOURCE.result_fundercount rfc on rfc.id=r.id
left outer join SOURCE.licenses_normalized rln on rl.type=rln.license group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
group by rl.id case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
) rln on rln.id=r.id cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, p.funder, c.code, c.name;
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, p.funder, c.code, c.name;
compute stats TARGET.result_affiliated_country; -- compute stats TARGET.result_affiliated_country;
compute stats TARGET.result_affiliated_year; -- compute stats TARGET.result_affiliated_year;
compute stats TARGET.result_affiliated_year_country; -- compute stats TARGET.result_affiliated_year_country;
compute stats TARGET.result_affiliated_datasource; -- compute stats TARGET.result_affiliated_datasource;
compute stats TARGET.result_affiliated_datasource_country; -- compute stats TARGET.result_affiliated_datasource_country;
compute stats TARGET.result_affiliated_organization; -- compute stats TARGET.result_affiliated_organization;
compute stats TARGET.result_affiliated_organization_country; -- compute stats TARGET.result_affiliated_organization_country;
compute stats TARGET.result_affiliated_funder; -- compute stats TARGET.result_affiliated_funder;
compute stats TARGET.result_affiliated_funder_country; -- compute stats TARGET.result_affiliated_funder_country;
compute stats TARGET.result_deposited_country; -- compute stats TARGET.result_deposited_country;
compute stats TARGET.result_deposited_year; -- compute stats TARGET.result_deposited_year;
compute stats TARGET.result_deposited_year_country; -- compute stats TARGET.result_deposited_year_country;
compute stats TARGET.result_deposited_datasource; -- compute stats TARGET.result_deposited_datasource;
compute stats TARGET.result_deposited_datasource_country; -- compute stats TARGET.result_deposited_datasource_country;
compute stats TARGET.result_deposited_organization; -- compute stats TARGET.result_deposited_organization;
compute stats TARGET.result_deposited_organization_country; -- compute stats TARGET.result_deposited_organization_country;
compute stats TARGET.result_deposited_funder; -- compute stats TARGET.result_deposited_funder;
compute stats TARGET.result_deposited_funder_country; -- compute stats TARGET.result_deposited_funder_country;