From 8b681dcf1b99cd7fa0c1046abe52b3b35622bf83 Mon Sep 17 00:00:00 2001 From: antleb Date: Sat, 18 Sep 2021 00:35:14 +0300 Subject: [PATCH] attempt to make the observatory wf run in hive --- .../oa/graph/stats/oozie_app/observatory.sh | 4 +- .../scripts/step21-createObservatoryDB.sql | 840 +++++++++++------- 2 files changed, 527 insertions(+), 317 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/observatory.sh b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/observatory.sh index ff03bca03..7db8d40a5 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/observatory.sh +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/observatory.sh @@ -18,7 +18,9 @@ echo "Creating observatory database" impala-shell -q "drop database if exists ${TARGET} cascade" impala-shell -q "create database if not exists ${TARGET}" impala-shell -d ${SOURCE} -q "show tables" --delimited | sed "s/\(.*\)/create view ${TARGET}.\1 as select * from ${SOURCE}.\1;/" | impala-shell -f - -cat step21-createObservatoryDB.sql | sed s/SOURCE/$1/g | sed s/TARGET/$2/g1 | impala-shell -f - +cat step21-createObservatoryDB.sql | sed s/SOURCE/$1/g | sed s/TARGET/$2/g1 | hive -f - +impala-shell -q "invalidate metadata;" +impala-shell -d ${TARGET} -q "show tables" --delimited | sed "s/\(.*\)/compute stats ${TARGET}.\1;/" | impala-shell -f - echo "Impala shell finished" echo "Updating shadow observatory database" diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step21-createObservatoryDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step21-createObservatoryDB.sql index d71978a30..f17b5358f 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step21-createObservatoryDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step21-createObservatoryDB.sql @@ -1,372 +1,580 @@ -create table TARGET.result_affiliated_country stored as parquet as -select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, - case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, c.code as ccode, c.name as cname +create table TARGET.result_cc_licence stored as parquet as +select r.id, coalesce(rln.count, 0) > 0 as cc_licence from SOURCE.result r -join SOURCE.result_organization ro on ro.id=r.id -join SOURCE.organization o on o.id=ro.organization -join SOURCE.country c on c.code=o.country and c.continent_name='Europe' -left outer join SOURCE.result_licenses rl on rl.id=r.id -left outer join SOURCE.result_pids pids on pids.id=r.id -left outer join ( + left outer join ( select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count from SOURCE.result_licenses rl - left outer join SOURCE.licenses_normalized rln on rl.type=rln.license + left outer join SOURCE.licenses_normalized rln on rl.type=rln.license group by rl.id -) rln on rln.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, c.code, c.name; +) rln on rln.id=r.id; + +create table TARGET.result_affiliated_country stored as parquet as +select + count(distinct r.id) as total, + r.green, + r.gold, + case when rl.type is not null then true else false end as licence, + case when pids.pid is not null then true else false end as pid, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, + r.peer_reviewed, + rln.cc_licence, + r.abstract as abstract, + r.authors > 1 as multiple_authors, + rpc.count > 1 as multiple_projects, + rfc.count > 1 as multiple_funders, + r.type, + c.code as ccode, c.name as cname +from SOURCE.result r + join SOURCE.result_organization ro on ro.id=r.id + join SOURCE.organization o on o.id=ro.organization + join SOURCE.country c on c.code=o.country and c.continent_name='Europe' + left outer join SOURCE.result_licenses rl on rl.id=r.id + left outer join SOURCE.result_pids pids on pids.id=r.id + left outer join SOURCE.result_cc_licence rln on rln.id=r.id + left outer join SOURCE.result_projectcount rpc on rpc.id=r.id + left outer join SOURCE.result_fundercount rfc on rfc.id=r.id +group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, + cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, c.code, c.name; create table TARGET.result_affiliated_year stored as parquet as -select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, - case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, - coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, r.year +select + count(distinct r.id) as total, + r.green, + r.gold, + case when rl.type is not null then true else false end as licence, + case when pids.pid is not null then true else false end as pid, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, + r.peer_reviewed, + rln.cc_licence, + r.abstract as abstract, + r.authors > 1 as multiple_authors, + rpc.count > 1 as multiple_projects, + rfc.count > 1 as multiple_funders, + r.type, + r.year from SOURCE.result r -join SOURCE.result_organization ro on ro.id=r.id -join SOURCE.organization o on o.id=ro.organization -join SOURCE.country c on c.code=o.country and c.continent_name='Europe' -left outer join SOURCE.result_licenses rl on rl.id=r.id -left outer join SOURCE.result_pids pids on pids.id=r.id -left outer join ( - select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count - from SOURCE.result_licenses rl - left outer join SOURCE.licenses_normalized rln on rl.type=rln.license - group by rl.id -) rln on rln.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, r.year; + join SOURCE.result_organization ro on ro.id=r.id + join SOURCE.organization o on o.id=ro.organization + join SOURCE.country c on c.code=o.country and c.continent_name='Europe' + left outer join SOURCE.result_licenses rl on rl.id=r.id + left outer join SOURCE.result_pids pids on pids.id=r.id + left outer join SOURCE.result_cc_licence rln on rln.id=r.id + left outer join SOURCE.result_projectcount rpc on rpc.id=r.id + left outer join SOURCE.result_fundercount rfc on rfc.id=r.id +group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, + cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, r.year; create table TARGET.result_affiliated_year_country stored as parquet as -select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, - case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, r.year, c.code as ccode, c.name as cname +select + count(distinct r.id) as total, + r.green, + r.gold, + case when rl.type is not null then true else false end as licence, + case when pids.pid is not null then true else false end as pid, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, + r.peer_reviewed, + rln.cc_licence, + r.abstract as abstract, + r.authors > 1 as multiple_authors, + rpc.count > 1 as multiple_projects, + rfc.count > 1 as multiple_funders, + r.type, + r.year, c.code as ccode, c.name as cname from SOURCE.result r -join SOURCE.result_organization ro on ro.id=r.id -join SOURCE.organization o on o.id=ro.organization -join SOURCE.country c on c.code=o.country and c.continent_name='Europe' -left outer join SOURCE.result_licenses rl on rl.id=r.id -left outer join SOURCE.result_pids pids on pids.id=r.id -left outer join ( - select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count - from SOURCE.result_licenses rl - left outer join SOURCE.licenses_normalized rln on rl.type=rln.license - group by rl.id -) rln on rln.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, r.year, c.code, c.name; + join SOURCE.result_organization ro on ro.id=r.id + join SOURCE.organization o on o.id=ro.organization + join SOURCE.country c on c.code=o.country and c.continent_name='Europe' + left outer join SOURCE.result_licenses rl on rl.id=r.id + left outer join SOURCE.result_pids pids on pids.id=r.id + left outer join SOURCE.result_cc_licence rln on rln.id=r.id + left outer join SOURCE.result_projectcount rpc on rpc.id=r.id + left outer join SOURCE.result_fundercount rfc on rfc.id=r.id +group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, + cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, r.year, c.code, c.name; create table TARGET.result_affiliated_datasource stored as parquet as -select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, - case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, - coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, d.name as dname +select + count(distinct r.id) as total, + r.green, + r.gold, + case when rl.type is not null then true else false end as licence, + case when pids.pid is not null then true else false end as pid, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, + r.peer_reviewed, + rln.cc_licence, + r.abstract as abstract, + r.authors > 1 as multiple_authors, + rpc.count > 1 as multiple_projects, + rfc.count > 1 as multiple_funders, + r.type, + d.name as dname from SOURCE.result r -join SOURCE.result_organization ro on ro.id=r.id -join SOURCE.organization o on o.id=ro.organization -join SOURCE.country c on c.code=o.country and c.continent_name='Europe' -left outer join SOURCE.result_datasources rd on rd.id=r.id -left outer join SOURCE.datasource d on d.id=rd.datasource -left outer join SOURCE.result_licenses rl on rl.id=r.id -left outer join SOURCE.result_pids pids on pids.id=r.id -left outer join ( - select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count - from SOURCE.result_licenses rl - left outer join SOURCE.licenses_normalized rln on rl.type=rln.license - group by rl.id -) rln on rln.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, d.name; + join SOURCE.result_organization ro on ro.id=r.id + join SOURCE.organization o on o.id=ro.organization + join SOURCE.country c on c.code=o.country and c.continent_name='Europe' + left outer join SOURCE.result_datasources rd on rd.id=r.id + left outer join SOURCE.datasource d on d.id=rd.datasource + left outer join SOURCE.result_licenses rl on rl.id=r.id + left outer join SOURCE.result_pids pids on pids.id=r.id + left outer join SOURCE.result_cc_licence rln on rln.id=r.id + left outer join SOURCE.result_projectcount rpc on rpc.id=r.id + left outer join SOURCE.result_fundercount rfc on rfc.id=r.id +group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, + cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, d.name; create table TARGET.result_affiliated_datasource_country stored as parquet as -select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, - case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, d.name as dname, c.code as ccode, c.name as cname +select + count(distinct r.id) as total, + r.green, + r.gold, + case when rl.type is not null then true else false end as licence, + case when pids.pid is not null then true else false end as pid, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, + r.peer_reviewed, + rln.cc_licence, + r.abstract as abstract, + r.authors > 1 as multiple_authors, + rpc.count > 1 as multiple_projects, + rfc.count > 1 as multiple_funders, + r.type, + d.name as dname, c.code as ccode, c.name as cname from SOURCE.result r -join SOURCE.result_organization ro on ro.id=r.id -join SOURCE.organization o on o.id=ro.organization -join SOURCE.country c on c.code=o.country and c.continent_name='Europe' -left outer join SOURCE.result_datasources rd on rd.id=r.id -left outer join SOURCE.datasource d on d.id=rd.datasource -left outer join SOURCE.result_licenses rl on rl.id=r.id -left outer join SOURCE.result_pids pids on pids.id=r.id -left outer join ( - select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count - from SOURCE.result_licenses rl - left outer join SOURCE.licenses_normalized rln on rl.type=rln.license - group by rl.id -) rln on rln.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, d.name, c.code, c.name; + join SOURCE.result_organization ro on ro.id=r.id + join SOURCE.organization o on o.id=ro.organization + join SOURCE.country c on c.code=o.country and c.continent_name='Europe' + left outer join SOURCE.result_datasources rd on rd.id=r.id + left outer join SOURCE.datasource d on d.id=rd.datasource + left outer join SOURCE.result_licenses rl on rl.id=r.id + left outer join SOURCE.result_pids pids on pids.id=r.id + left outer join SOURCE.result_cc_licence rln on rln.id=r.id + left outer join SOURCE.result_projectcount rpc on rpc.id=r.id + left outer join SOURCE.result_fundercount rfc on rfc.id=r.id +group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, + cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, d.name, c.code, c.name; create table TARGET.result_affiliated_organization stored as parquet as -select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, - case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, o.name as oname +select + count(distinct r.id) as total, + r.green, + r.gold, + case when rl.type is not null then true else false end as licence, + case when pids.pid is not null then true else false end as pid, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, + r.peer_reviewed, + rln.cc_licence, + r.abstract as abstract, + r.authors > 1 as multiple_authors, + rpc.count > 1 as multiple_projects, + rfc.count > 1 as multiple_funders, + r.type, + o.name as oname from SOURCE.result r -join SOURCE.result_organization ro on ro.id=r.id -join SOURCE.organization o on o.id=ro.organization -join SOURCE.country c on c.code=o.country and c.continent_name='Europe' -left outer join SOURCE.result_licenses rl on rl.id=r.id -left outer join SOURCE.result_pids pids on pids.id=r.id -left outer join ( - select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count - from SOURCE.result_licenses rl - left outer join SOURCE.licenses_normalized rln on rl.type=rln.license - group by rl.id -) rln on rln.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, o.name; + join SOURCE.result_organization ro on ro.id=r.id + join SOURCE.organization o on o.id=ro.organization + join SOURCE.country c on c.code=o.country and c.continent_name='Europe' + left outer join SOURCE.result_licenses rl on rl.id=r.id + left outer join SOURCE.result_pids pids on pids.id=r.id + left outer join SOURCE.result_cc_licence rln on rln.id=r.id + left outer join SOURCE.result_projectcount rpc on rpc.id=r.id + left outer join SOURCE.result_fundercount rfc on rfc.id=r.id +group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, + cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, o.name; create table TARGET.result_affiliated_organization_country stored as parquet as -select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, - case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, o.name as oname, c.code as ccode, c.name as cname +select + count(distinct r.id) as total, + r.green, + r.gold, + case when rl.type is not null then true else false end as licence, + case when pids.pid is not null then true else false end as pid, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, + r.peer_reviewed, + rln.cc_licence, + r.abstract as abstract, + r.authors > 1 as multiple_authors, + rpc.count > 1 as multiple_projects, + rfc.count > 1 as multiple_funders, + r.type, + o.name as oname, c.code as ccode, c.name as cname from SOURCE.result r -join SOURCE.result_organization ro on ro.id=r.id -join SOURCE.organization o on o.id=ro.organization -join SOURCE.country c on c.code=o.country and c.continent_name='Europe' -left outer join SOURCE.result_licenses rl on rl.id=r.id -left outer join SOURCE.result_pids pids on pids.id=r.id -left outer join ( - select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count - from SOURCE.result_licenses rl - left outer join SOURCE.licenses_normalized rln on rl.type=rln.license - group by rl.id -) rln on rln.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, o.name, c.code, c.name; + join SOURCE.result_organization ro on ro.id=r.id + join SOURCE.organization o on o.id=ro.organization + join SOURCE.country c on c.code=o.country and c.continent_name='Europe' + left outer join SOURCE.result_licenses rl on rl.id=r.id + left outer join SOURCE.result_pids pids on pids.id=r.id + left outer join SOURCE.result_cc_licence rln on rln.id=r.id + left outer join SOURCE.result_projectcount rpc on rpc.id=r.id + left outer join SOURCE.result_fundercount rfc on rfc.id=r.id +group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, + cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, o.name, c.code, c.name; create table TARGET.result_affiliated_funder stored as parquet as -select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, - case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, - coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, p.funder as pfunder +select + count(distinct r.id) as total, + r.green, + r.gold, + case when rl.type is not null then true else false end as licence, + case when pids.pid is not null then true else false end as pid, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, + r.peer_reviewed, + rln.cc_licence, + r.abstract as abstract, + r.authors > 1 as multiple_authors, + rpc.count > 1 as multiple_projects, + rfc.count > 1 as multiple_funders, + r.type, + p.funder as pfunder from SOURCE.result r -join SOURCE.result_organization ro on ro.id=r.id -join SOURCE.organization o on o.id=ro.organization -join SOURCE.country c on c.code=o.country and c.continent_name='Europe' -join SOURCE.result_projects rp on rp.id=r.id -join SOURCE.project p on p.id=rp.project -left outer join SOURCE.result_licenses rl on rl.id=r.id -left outer join SOURCE.result_pids pids on pids.id=r.id -left outer join ( - select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count - from SOURCE.result_licenses rl - left outer join SOURCE.licenses_normalized rln on rl.type=rln.license - group by rl.id -) rln on rln.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, p.funder; + join SOURCE.result_organization ro on ro.id=r.id + join SOURCE.organization o on o.id=ro.organization + join SOURCE.country c on c.code=o.country and c.continent_name='Europe' + join SOURCE.result_projects rp on rp.id=r.id + join SOURCE.project p on p.id=rp.project + left outer join SOURCE.result_licenses rl on rl.id=r.id + left outer join SOURCE.result_pids pids on pids.id=r.id + left outer join SOURCE.result_cc_licence rln on rln.id=r.id + left outer join SOURCE.result_projectcount rpc on rpc.id=r.id + left outer join SOURCE.result_fundercount rfc on rfc.id=r.id +group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, + cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, p.funder; create table TARGET.result_affiliated_funder_country stored as parquet as -select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, - case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, p.funder as pfunder, c.code as ccode, c.name as cname +select + count(distinct r.id) as total, + r.green, + r.gold, + case when rl.type is not null then true else false end as licence, + case when pids.pid is not null then true else false end as pid, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, + r.peer_reviewed, + rln.cc_licence, + r.abstract as abstract, + r.authors > 1 as multiple_authors, + rpc.count > 1 as multiple_projects, + rfc.count > 1 as multiple_funders, + r.type, + p.funder as pfunder, c.code as ccode, c.name as cname from SOURCE.result r -join SOURCE.result_organization ro on ro.id=r.id -join SOURCE.organization o on o.id=ro.organization -join SOURCE.country c on c.code=o.country and c.continent_name='Europe' -join SOURCE.result_projects rp on rp.id=r.id -join SOURCE.project p on p.id=rp.project -left outer join SOURCE.result_licenses rl on rl.id=r.id -left outer join SOURCE.result_pids pids on pids.id=r.id -left outer join ( - select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count - from SOURCE.result_licenses rl - left outer join SOURCE.licenses_normalized rln on rl.type=rln.license - group by rl.id -) rln on rln.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, p.funder, c.code, c.name; + join SOURCE.result_organization ro on ro.id=r.id + join SOURCE.organization o on o.id=ro.organization + join SOURCE.country c on c.code=o.country and c.continent_name='Europe' + join SOURCE.result_projects rp on rp.id=r.id + join SOURCE.project p on p.id=rp.project + left outer join SOURCE.result_licenses rl on rl.id=r.id + left outer join SOURCE.result_pids pids on pids.id=r.id + left outer join SOURCE.result_cc_licence rln on rln.id=r.id + left outer join SOURCE.result_projectcount rpc on rpc.id=r.id + left outer join SOURCE.result_fundercount rfc on rfc.id=r.id +group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, + cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, p.funder, c.code, c.name; create table TARGET.result_deposited_country stored as parquet as -select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, - case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, c.code as ccode, c.name as cname +select + count(distinct r.id) as total, + r.green, + r.gold, + case when rl.type is not null then true else false end as licence, + case when pids.pid is not null then true else false end as pid, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, + r.peer_reviewed, + rln.cc_licence, + r.abstract as abstract, + r.authors > 1 as multiple_authors, + rpc.count > 1 as multiple_projects, + rfc.count > 1 as multiple_funders, + r.type, + c.code as ccode, c.name as cname from SOURCE.result r -join SOURCE.result_datasources rd on rd.id=r.id -join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') -join SOURCE.datasource_organizations dor on dor.id=d.id -join SOURCE.organization o on o.id=dor.organization -join SOURCE.country c on c.code=o.country and c.continent_name='Europe' -left outer join SOURCE.result_licenses rl on rl.id=r.id -left outer join SOURCE.result_pids pids on pids.id=r.id -left outer join ( - select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count - from SOURCE.result_licenses rl - left outer join SOURCE.licenses_normalized rln on rl.type=rln.license - group by rl.id -) rln on rln.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, c.code, c.name; + join SOURCE.result_datasources rd on rd.id=r.id + join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') + join SOURCE.datasource_organizations dor on dor.id=d.id + join SOURCE.organization o on o.id=dor.organization + join SOURCE.country c on c.code=o.country and c.continent_name='Europe' + left outer join SOURCE.result_licenses rl on rl.id=r.id + left outer join SOURCE.result_pids pids on pids.id=r.id + left outer join SOURCE.result_cc_licence rln on rln.id=r.id + left outer join SOURCE.result_projectcount rpc on rpc.id=r.id + left outer join SOURCE.result_fundercount rfc on rfc.id=r.id +group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, + cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, c.code, c.name; create table TARGET.result_deposited_year stored as parquet as -select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, - case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, - coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, r.year +select + count(distinct r.id) as total, + r.green, + r.gold, + case when rl.type is not null then true else false end as licence, + case when pids.pid is not null then true else false end as pid, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, + r.peer_reviewed, + rln.cc_licence, + r.abstract as abstract, + r.authors > 1 as multiple_authors, + rpc.count > 1 as multiple_projects, + rfc.count > 1 as multiple_funders, + r.type, + r.year from SOURCE.result r -join SOURCE.result_datasources rd on rd.id=r.id -join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') -join SOURCE.datasource_organizations dor on dor.id=d.id -join SOURCE.organization o on o.id=dor.organization -join SOURCE.country c on c.code=o.country and c.continent_name='Europe' -left outer join SOURCE.result_licenses rl on rl.id=r.id -left outer join SOURCE.result_pids pids on pids.id=r.id -left outer join ( - select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count - from SOURCE.result_licenses rl - left outer join SOURCE.licenses_normalized rln on rl.type=rln.license - group by rl.id -) rln on rln.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, r.year; + join SOURCE.result_datasources rd on rd.id=r.id + join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') + join SOURCE.datasource_organizations dor on dor.id=d.id + join SOURCE.organization o on o.id=dor.organization + join SOURCE.country c on c.code=o.country and c.continent_name='Europe' + left outer join SOURCE.result_licenses rl on rl.id=r.id + left outer join SOURCE.result_pids pids on pids.id=r.id + left outer join SOURCE.result_cc_licence rln on rln.id=r.id + left outer join SOURCE.result_projectcount rpc on rpc.id=r.id + left outer join SOURCE.result_fundercount rfc on rfc.id=r.id +group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, + cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, r.year; create table TARGET.result_deposited_year_country stored as parquet as -select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, - case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, r.year, c.code as ccode, c.name as cname +select + count(distinct r.id) as total, + r.green, + r.gold, + case when rl.type is not null then true else false end as licence, + case when pids.pid is not null then true else false end as pid, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, + r.peer_reviewed, + rln.cc_licence, + r.abstract as abstract, + r.authors > 1 as multiple_authors, + rpc.count > 1 as multiple_projects, + rfc.count > 1 as multiple_funders, + r.type, + r.year, c.code as ccode, c.name as cname from SOURCE.result r -join SOURCE.result_datasources rd on rd.id=r.id -join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') -join SOURCE.datasource_organizations dor on dor.id=d.id -join SOURCE.organization o on o.id=dor.organization -join SOURCE.country c on c.code=o.country and c.continent_name='Europe' -left outer join SOURCE.result_licenses rl on rl.id=r.id -left outer join SOURCE.result_pids pids on pids.id=r.id -left outer join ( - select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count - from SOURCE.result_licenses rl - left outer join SOURCE.licenses_normalized rln on rl.type=rln.license - group by rl.id -) rln on rln.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, r.year, c.code, c.name; + join SOURCE.result_datasources rd on rd.id=r.id + join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') + join SOURCE.datasource_organizations dor on dor.id=d.id + join SOURCE.organization o on o.id=dor.organization + join SOURCE.country c on c.code=o.country and c.continent_name='Europe' + left outer join SOURCE.result_licenses rl on rl.id=r.id + left outer join SOURCE.result_pids pids on pids.id=r.id + left outer join SOURCE.result_cc_licence rln on rln.id=r.id + left outer join SOURCE.result_projectcount rpc on rpc.id=r.id + left outer join SOURCE.result_fundercount rfc on rfc.id=r.id +group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, + cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, r.year, c.code, c.name; create table TARGET.result_deposited_datasource stored as parquet as -select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, - case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, d.name as dname +select + count(distinct r.id) as total, + r.green, + r.gold, + case when rl.type is not null then true else false end as licence, + case when pids.pid is not null then true else false end as pid, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, + r.peer_reviewed, + rln.cc_licence, + r.abstract as abstract, + r.authors > 1 as multiple_authors, + rpc.count > 1 as multiple_projects, + rfc.count > 1 as multiple_funders, + r.type, + d.name as dname from SOURCE.result r -join SOURCE.result_datasources rd on rd.id=r.id -join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') -join SOURCE.datasource_organizations dor on dor.id=d.id -join SOURCE.organization o on o.id=dor.organization -join SOURCE.country c on c.code=o.country and c.continent_name='Europe' -left outer join SOURCE.result_licenses rl on rl.id=r.id -left outer join SOURCE.result_pids pids on pids.id=r.id -left outer join ( - select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count - from SOURCE.result_licenses rl - left outer join SOURCE.licenses_normalized rln on rl.type=rln.license - group by rl.id -) rln on rln.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, d.name; + join SOURCE.result_datasources rd on rd.id=r.id + join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') + join SOURCE.datasource_organizations dor on dor.id=d.id + join SOURCE.organization o on o.id=dor.organization + join SOURCE.country c on c.code=o.country and c.continent_name='Europe' + left outer join SOURCE.result_licenses rl on rl.id=r.id + left outer join SOURCE.result_pids pids on pids.id=r.id + left outer join SOURCE.result_cc_licence rln on rln.id=r.id + left outer join SOURCE.result_projectcount rpc on rpc.id=r.id + left outer join SOURCE.result_fundercount rfc on rfc.id=r.id +group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, + cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, d.name; create table TARGET.result_deposited_datasource_country stored as parquet as -select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, - case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, d.name as dname, c.code as ccode, c.name as cname +select + count(distinct r.id) as total, + r.green, + r.gold, + case when rl.type is not null then true else false end as licence, + case when pids.pid is not null then true else false end as pid, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, + r.peer_reviewed, + rln.cc_licence, + r.abstract as abstract, + r.authors > 1 as multiple_authors, + rpc.count > 1 as multiple_projects, + rfc.count > 1 as multiple_funders, + r.type, + d.name as dname, c.code as ccode, c.name as cname from SOURCE.result r -join SOURCE.result_datasources rd on rd.id=r.id -join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') -join SOURCE.datasource_organizations dor on dor.id=d.id -join SOURCE.organization o on o.id=dor.organization -join SOURCE.country c on c.code=o.country and c.continent_name='Europe' -left outer join SOURCE.result_licenses rl on rl.id=r.id -left outer join SOURCE.result_pids pids on pids.id=r.id -left outer join ( - select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count - from SOURCE.result_licenses rl - left outer join SOURCE.licenses_normalized rln on rl.type=rln.license - group by rl.id -) rln on rln.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, d.name, c.code, c.name; + join SOURCE.result_datasources rd on rd.id=r.id + join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') + join SOURCE.datasource_organizations dor on dor.id=d.id + join SOURCE.organization o on o.id=dor.organization + join SOURCE.country c on c.code=o.country and c.continent_name='Europe' + left outer join SOURCE.result_licenses rl on rl.id=r.id + left outer join SOURCE.result_pids pids on pids.id=r.id + left outer join SOURCE.result_cc_licence rln on rln.id=r.id + left outer join SOURCE.result_projectcount rpc on rpc.id=r.id + left outer join SOURCE.result_fundercount rfc on rfc.id=r.id +group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, + cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, d.name, c.code, c.name; create table TARGET.result_deposited_organization stored as parquet as -select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, - case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, - coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, o.name as oname +select + count(distinct r.id) as total, + r.green, + r.gold, + case when rl.type is not null then true else false end as licence, + case when pids.pid is not null then true else false end as pid, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, + r.peer_reviewed, + rln.cc_licence, + r.abstract as abstract, + r.authors > 1 as multiple_authors, + rpc.count > 1 as multiple_projects, + rfc.count > 1 as multiple_funders, + r.type, + o.name as oname from SOURCE.result r -join SOURCE.result_datasources rd on rd.id=r.id -join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') -join SOURCE.datasource_organizations dor on dor.id=d.id -join SOURCE.organization o on o.id=dor.organization -join SOURCE.country c on c.code=o.country and c.continent_name='Europe' -left outer join SOURCE.result_licenses rl on rl.id=r.id -left outer join SOURCE.result_pids pids on pids.id=r.id -left outer join ( - select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count - from SOURCE.result_licenses rl - left outer join SOURCE.licenses_normalized rln on rl.type=rln.license - group by rl.id -) rln on rln.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, o.name; + join SOURCE.result_datasources rd on rd.id=r.id + join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') + join SOURCE.datasource_organizations dor on dor.id=d.id + join SOURCE.organization o on o.id=dor.organization + join SOURCE.country c on c.code=o.country and c.continent_name='Europe' + left outer join SOURCE.result_licenses rl on rl.id=r.id + left outer join SOURCE.result_pids pids on pids.id=r.id + left outer join SOURCE.result_cc_licence rln on rln.id=r.id + left outer join SOURCE.result_projectcount rpc on rpc.id=r.id + left outer join SOURCE.result_fundercount rfc on rfc.id=r.id +group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, + cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, o.name; create table TARGET.result_deposited_organization_country stored as parquet as -select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, - case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, o.name as oname, c.code as ccode, c.name as cname +select + count(distinct r.id) as total, + r.green, + r.gold, + case when rl.type is not null then true else false end as licence, + case when pids.pid is not null then true else false end as pid, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, + r.peer_reviewed, + rln.cc_licence, + r.abstract as abstract, + r.authors > 1 as multiple_authors, + rpc.count > 1 as multiple_projects, + rfc.count > 1 as multiple_funders, + r.type, + o.name as oname, c.code as ccode, c.name as cname from SOURCE.result r -join SOURCE.result_datasources rd on rd.id=r.id -join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') -join SOURCE.datasource_organizations dor on dor.id=d.id -join SOURCE.organization o on o.id=dor.organization -join SOURCE.country c on c.code=o.country and c.continent_name='Europe' -left outer join SOURCE.result_licenses rl on rl.id=r.id -left outer join SOURCE.result_pids pids on pids.id=r.id -left outer join ( - select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count - from SOURCE.result_licenses rl - left outer join SOURCE.licenses_normalized rln on rl.type=rln.license - group by rl.id -) rln on rln.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, o.name, c.code, c.name; + join SOURCE.result_datasources rd on rd.id=r.id + join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') + join SOURCE.datasource_organizations dor on dor.id=d.id + join SOURCE.organization o on o.id=dor.organization + join SOURCE.country c on c.code=o.country and c.continent_name='Europe' + left outer join SOURCE.result_licenses rl on rl.id=r.id + left outer join SOURCE.result_pids pids on pids.id=r.id + left outer join SOURCE.result_cc_licence rln on rln.id=r.id + left outer join SOURCE.result_projectcount rpc on rpc.id=r.id + left outer join SOURCE.result_fundercount rfc on rfc.id=r.id +group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, + cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, o.name, c.code, c.name; create table TARGET.result_deposited_funder stored as parquet as -select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, - case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, p.funder as pfunder +select + count(distinct r.id) as total, + r.green, + r.gold, + case when rl.type is not null then true else false end as licence, + case when pids.pid is not null then true else false end as pid, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, + r.peer_reviewed, + rln.cc_licence, + r.abstract as abstract, + r.authors > 1 as multiple_authors, + rpc.count > 1 as multiple_projects, + rfc.count > 1 as multiple_funders, + r.type, + p.funder as pfunder from SOURCE.result r -join SOURCE.result_datasources rd on rd.id=r.id -join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') -join SOURCE.datasource_organizations dor on dor.id=d.id -join SOURCE.organization o on o.id=dor.organization -join SOURCE.country c on c.code=o.country and c.continent_name='Europe' -join SOURCE.result_projects rp on rp.id=r.id -join SOURCE.project p on p.id=rp.project -left outer join SOURCE.result_licenses rl on rl.id=r.id -left outer join SOURCE.result_pids pids on pids.id=r.id -left outer join ( - select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count - from SOURCE.result_licenses rl - left outer join SOURCE.licenses_normalized rln on rl.type=rln.license - group by rl.id -) rln on rln.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, p.funder; + join SOURCE.result_datasources rd on rd.id=r.id + join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') + join SOURCE.datasource_organizations dor on dor.id=d.id + join SOURCE.organization o on o.id=dor.organization + join SOURCE.country c on c.code=o.country and c.continent_name='Europe' + join SOURCE.result_projects rp on rp.id=r.id + join SOURCE.project p on p.id=rp.project + left outer join SOURCE.result_licenses rl on rl.id=r.id + left outer join SOURCE.result_pids pids on pids.id=r.id + left outer join SOURCE.result_cc_licence rln on rln.id=r.id + left outer join SOURCE.result_projectcount rpc on rpc.id=r.id + left outer join SOURCE.result_fundercount rfc on rfc.id=r.id +group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, + cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, p.funder; create table TARGET.result_deposited_funder_country stored as parquet as -select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, - case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, p.funder as pfunder, c.code as ccode, c.name as cname +select + count(distinct r.id) as total, + r.green, + r.gold, + case when rl.type is not null then true else false end as licence, + case when pids.pid is not null then true else false end as pid, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, + r.peer_reviewed, + rln.cc_licence, + r.abstract as abstract, + r.authors > 1 as multiple_authors, + rpc.count > 1 as multiple_projects, + rfc.count > 1 as multiple_funders, + r.type, + p.funder as pfunder, c.code as ccode, c.name as cname from SOURCE.result r -join SOURCE.result_datasources rd on rd.id=r.id -join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') -join SOURCE.datasource_organizations dor on dor.id=d.id -join SOURCE.organization o on o.id=dor.organization -join SOURCE.country c on c.code=o.country and c.continent_name='Europe' -join SOURCE.result_projects rp on rp.id=r.id -join SOURCE.project p on p.id=rp.project -left outer join SOURCE.result_licenses rl on rl.id=r.id -left outer join SOURCE.result_pids pids on pids.id=r.id -left outer join ( - select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count - from SOURCE.result_licenses rl - left outer join SOURCE.licenses_normalized rln on rl.type=rln.license - group by rl.id -) rln on rln.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, p.funder, c.code, c.name; + join SOURCE.result_datasources rd on rd.id=r.id + join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') + join SOURCE.datasource_organizations dor on dor.id=d.id + join SOURCE.organization o on o.id=dor.organization + join SOURCE.country c on c.code=o.country and c.continent_name='Europe' + join SOURCE.result_projects rp on rp.id=r.id + join SOURCE.project p on p.id=rp.project + left outer join SOURCE.result_licenses rl on rl.id=r.id + left outer join SOURCE.result_pids pids on pids.id=r.id + left outer join SOURCE.result_cc_licence rln on rln.id=r.id + left outer join SOURCE.result_projectcount rpc on rpc.id=r.id + left outer join SOURCE.result_fundercount rfc on rfc.id=r.id +group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, + cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, p.funder, c.code, c.name; -compute stats TARGET.result_affiliated_country; -compute stats TARGET.result_affiliated_year; -compute stats TARGET.result_affiliated_year_country; -compute stats TARGET.result_affiliated_datasource; -compute stats TARGET.result_affiliated_datasource_country; -compute stats TARGET.result_affiliated_organization; -compute stats TARGET.result_affiliated_organization_country; -compute stats TARGET.result_affiliated_funder; -compute stats TARGET.result_affiliated_funder_country; -compute stats TARGET.result_deposited_country; -compute stats TARGET.result_deposited_year; -compute stats TARGET.result_deposited_year_country; -compute stats TARGET.result_deposited_datasource; -compute stats TARGET.result_deposited_datasource_country; -compute stats TARGET.result_deposited_organization; -compute stats TARGET.result_deposited_organization_country; -compute stats TARGET.result_deposited_funder; -compute stats TARGET.result_deposited_funder_country; \ No newline at end of file +-- compute stats TARGET.result_affiliated_country; +-- compute stats TARGET.result_affiliated_year; +-- compute stats TARGET.result_affiliated_year_country; +-- compute stats TARGET.result_affiliated_datasource; +-- compute stats TARGET.result_affiliated_datasource_country; +-- compute stats TARGET.result_affiliated_organization; +-- compute stats TARGET.result_affiliated_organization_country; +-- compute stats TARGET.result_affiliated_funder; +-- compute stats TARGET.result_affiliated_funder_country; +-- compute stats TARGET.result_deposited_country; +-- compute stats TARGET.result_deposited_year; +-- compute stats TARGET.result_deposited_year_country; +-- compute stats TARGET.result_deposited_datasource; +-- compute stats TARGET.result_deposited_datasource_country; +-- compute stats TARGET.result_deposited_organization; +-- compute stats TARGET.result_deposited_organization_country; +-- compute stats TARGET.result_deposited_funder; +-- compute stats TARGET.result_deposited_funder_country; \ No newline at end of file