attempt to make the observatory wf run in hive
This commit is contained in:
parent
2943287d10
commit
8b681dcf1b
|
@ -18,7 +18,9 @@ echo "Creating observatory database"
|
||||||
impala-shell -q "drop database if exists ${TARGET} cascade"
|
impala-shell -q "drop database if exists ${TARGET} cascade"
|
||||||
impala-shell -q "create database if not exists ${TARGET}"
|
impala-shell -q "create database if not exists ${TARGET}"
|
||||||
impala-shell -d ${SOURCE} -q "show tables" --delimited | sed "s/\(.*\)/create view ${TARGET}.\1 as select * from ${SOURCE}.\1;/" | impala-shell -f -
|
impala-shell -d ${SOURCE} -q "show tables" --delimited | sed "s/\(.*\)/create view ${TARGET}.\1 as select * from ${SOURCE}.\1;/" | impala-shell -f -
|
||||||
cat step21-createObservatoryDB.sql | sed s/SOURCE/$1/g | sed s/TARGET/$2/g1 | impala-shell -f -
|
cat step21-createObservatoryDB.sql | sed s/SOURCE/$1/g | sed s/TARGET/$2/g1 | hive -f -
|
||||||
|
impala-shell -q "invalidate metadata;"
|
||||||
|
impala-shell -d ${TARGET} -q "show tables" --delimited | sed "s/\(.*\)/compute stats ${TARGET}.\1;/" | impala-shell -f -
|
||||||
echo "Impala shell finished"
|
echo "Impala shell finished"
|
||||||
|
|
||||||
echo "Updating shadow observatory database"
|
echo "Updating shadow observatory database"
|
||||||
|
|
|
@ -1,61 +1,116 @@
|
||||||
|
create table TARGET.result_cc_licence stored as parquet as
|
||||||
|
select r.id, coalesce(rln.count, 0) > 0 as cc_licence
|
||||||
|
from SOURCE.result r
|
||||||
|
left outer join (
|
||||||
|
select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count
|
||||||
|
from SOURCE.result_licenses rl
|
||||||
|
left outer join SOURCE.licenses_normalized rln on rl.type=rln.license
|
||||||
|
group by rl.id
|
||||||
|
) rln on rln.id=r.id;
|
||||||
|
|
||||||
create table TARGET.result_affiliated_country stored as parquet as
|
create table TARGET.result_affiliated_country stored as parquet as
|
||||||
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
select
|
||||||
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
count(distinct r.id) as total,
|
||||||
r.peer_reviewed, coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, c.code as ccode, c.name as cname
|
r.green,
|
||||||
|
r.gold,
|
||||||
|
case when rl.type is not null then true else false end as licence,
|
||||||
|
case when pids.pid is not null then true else false end as pid,
|
||||||
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||||
|
r.peer_reviewed,
|
||||||
|
rln.cc_licence,
|
||||||
|
r.abstract as abstract,
|
||||||
|
r.authors > 1 as multiple_authors,
|
||||||
|
rpc.count > 1 as multiple_projects,
|
||||||
|
rfc.count > 1 as multiple_funders,
|
||||||
|
r.type,
|
||||||
|
c.code as ccode, c.name as cname
|
||||||
from SOURCE.result r
|
from SOURCE.result r
|
||||||
join SOURCE.result_organization ro on ro.id=r.id
|
join SOURCE.result_organization ro on ro.id=r.id
|
||||||
join SOURCE.organization o on o.id=ro.organization
|
join SOURCE.organization o on o.id=ro.organization
|
||||||
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
||||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||||
left outer join (
|
left outer join SOURCE.result_cc_licence rln on rln.id=r.id
|
||||||
select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count
|
left outer join SOURCE.result_projectcount rpc on rpc.id=r.id
|
||||||
from SOURCE.result_licenses rl
|
left outer join SOURCE.result_fundercount rfc on rfc.id=r.id
|
||||||
left outer join SOURCE.licenses_normalized rln on rl.type=rln.license
|
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||||
group by rl.id
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||||
) rln on rln.id=r.id
|
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, c.code, c.name;
|
||||||
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, c.code, c.name;
|
|
||||||
|
|
||||||
create table TARGET.result_affiliated_year stored as parquet as
|
create table TARGET.result_affiliated_year stored as parquet as
|
||||||
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
select
|
||||||
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed,
|
count(distinct r.id) as total,
|
||||||
coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, r.year
|
r.green,
|
||||||
|
r.gold,
|
||||||
|
case when rl.type is not null then true else false end as licence,
|
||||||
|
case when pids.pid is not null then true else false end as pid,
|
||||||
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||||
|
r.peer_reviewed,
|
||||||
|
rln.cc_licence,
|
||||||
|
r.abstract as abstract,
|
||||||
|
r.authors > 1 as multiple_authors,
|
||||||
|
rpc.count > 1 as multiple_projects,
|
||||||
|
rfc.count > 1 as multiple_funders,
|
||||||
|
r.type,
|
||||||
|
r.year
|
||||||
from SOURCE.result r
|
from SOURCE.result r
|
||||||
join SOURCE.result_organization ro on ro.id=r.id
|
join SOURCE.result_organization ro on ro.id=r.id
|
||||||
join SOURCE.organization o on o.id=ro.organization
|
join SOURCE.organization o on o.id=ro.organization
|
||||||
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
||||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||||
left outer join (
|
left outer join SOURCE.result_cc_licence rln on rln.id=r.id
|
||||||
select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count
|
left outer join SOURCE.result_projectcount rpc on rpc.id=r.id
|
||||||
from SOURCE.result_licenses rl
|
left outer join SOURCE.result_fundercount rfc on rfc.id=r.id
|
||||||
left outer join SOURCE.licenses_normalized rln on rl.type=rln.license
|
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||||
group by rl.id
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||||
) rln on rln.id=r.id
|
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, r.year;
|
||||||
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, r.year;
|
|
||||||
|
|
||||||
create table TARGET.result_affiliated_year_country stored as parquet as
|
create table TARGET.result_affiliated_year_country stored as parquet as
|
||||||
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
select
|
||||||
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
count(distinct r.id) as total,
|
||||||
r.peer_reviewed, coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, r.year, c.code as ccode, c.name as cname
|
r.green,
|
||||||
|
r.gold,
|
||||||
|
case when rl.type is not null then true else false end as licence,
|
||||||
|
case when pids.pid is not null then true else false end as pid,
|
||||||
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||||
|
r.peer_reviewed,
|
||||||
|
rln.cc_licence,
|
||||||
|
r.abstract as abstract,
|
||||||
|
r.authors > 1 as multiple_authors,
|
||||||
|
rpc.count > 1 as multiple_projects,
|
||||||
|
rfc.count > 1 as multiple_funders,
|
||||||
|
r.type,
|
||||||
|
r.year, c.code as ccode, c.name as cname
|
||||||
from SOURCE.result r
|
from SOURCE.result r
|
||||||
join SOURCE.result_organization ro on ro.id=r.id
|
join SOURCE.result_organization ro on ro.id=r.id
|
||||||
join SOURCE.organization o on o.id=ro.organization
|
join SOURCE.organization o on o.id=ro.organization
|
||||||
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
||||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||||
left outer join (
|
left outer join SOURCE.result_cc_licence rln on rln.id=r.id
|
||||||
select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count
|
left outer join SOURCE.result_projectcount rpc on rpc.id=r.id
|
||||||
from SOURCE.result_licenses rl
|
left outer join SOURCE.result_fundercount rfc on rfc.id=r.id
|
||||||
left outer join SOURCE.licenses_normalized rln on rl.type=rln.license
|
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||||
group by rl.id
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||||
) rln on rln.id=r.id
|
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, r.year, c.code, c.name;
|
||||||
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, r.year, c.code, c.name;
|
|
||||||
|
|
||||||
create table TARGET.result_affiliated_datasource stored as parquet as
|
create table TARGET.result_affiliated_datasource stored as parquet as
|
||||||
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
select
|
||||||
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed,
|
count(distinct r.id) as total,
|
||||||
coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, d.name as dname
|
r.green,
|
||||||
|
r.gold,
|
||||||
|
case when rl.type is not null then true else false end as licence,
|
||||||
|
case when pids.pid is not null then true else false end as pid,
|
||||||
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||||
|
r.peer_reviewed,
|
||||||
|
rln.cc_licence,
|
||||||
|
r.abstract as abstract,
|
||||||
|
r.authors > 1 as multiple_authors,
|
||||||
|
rpc.count > 1 as multiple_projects,
|
||||||
|
rfc.count > 1 as multiple_funders,
|
||||||
|
r.type,
|
||||||
|
d.name as dname
|
||||||
from SOURCE.result r
|
from SOURCE.result r
|
||||||
join SOURCE.result_organization ro on ro.id=r.id
|
join SOURCE.result_organization ro on ro.id=r.id
|
||||||
join SOURCE.organization o on o.id=ro.organization
|
join SOURCE.organization o on o.id=ro.organization
|
||||||
|
@ -64,18 +119,29 @@ left outer join SOURCE.result_datasources rd on rd.id=r.id
|
||||||
left outer join SOURCE.datasource d on d.id=rd.datasource
|
left outer join SOURCE.datasource d on d.id=rd.datasource
|
||||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||||
left outer join (
|
left outer join SOURCE.result_cc_licence rln on rln.id=r.id
|
||||||
select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count
|
left outer join SOURCE.result_projectcount rpc on rpc.id=r.id
|
||||||
from SOURCE.result_licenses rl
|
left outer join SOURCE.result_fundercount rfc on rfc.id=r.id
|
||||||
left outer join SOURCE.licenses_normalized rln on rl.type=rln.license
|
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||||
group by rl.id
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||||
) rln on rln.id=r.id
|
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, d.name;
|
||||||
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, d.name;
|
|
||||||
|
|
||||||
create table TARGET.result_affiliated_datasource_country stored as parquet as
|
create table TARGET.result_affiliated_datasource_country stored as parquet as
|
||||||
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
select
|
||||||
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
count(distinct r.id) as total,
|
||||||
r.peer_reviewed, coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, d.name as dname, c.code as ccode, c.name as cname
|
r.green,
|
||||||
|
r.gold,
|
||||||
|
case when rl.type is not null then true else false end as licence,
|
||||||
|
case when pids.pid is not null then true else false end as pid,
|
||||||
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||||
|
r.peer_reviewed,
|
||||||
|
rln.cc_licence,
|
||||||
|
r.abstract as abstract,
|
||||||
|
r.authors > 1 as multiple_authors,
|
||||||
|
rpc.count > 1 as multiple_projects,
|
||||||
|
rfc.count > 1 as multiple_funders,
|
||||||
|
r.type,
|
||||||
|
d.name as dname, c.code as ccode, c.name as cname
|
||||||
from SOURCE.result r
|
from SOURCE.result r
|
||||||
join SOURCE.result_organization ro on ro.id=r.id
|
join SOURCE.result_organization ro on ro.id=r.id
|
||||||
join SOURCE.organization o on o.id=ro.organization
|
join SOURCE.organization o on o.id=ro.organization
|
||||||
|
@ -84,54 +150,87 @@ left outer join SOURCE.result_datasources rd on rd.id=r.id
|
||||||
left outer join SOURCE.datasource d on d.id=rd.datasource
|
left outer join SOURCE.datasource d on d.id=rd.datasource
|
||||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||||
left outer join (
|
left outer join SOURCE.result_cc_licence rln on rln.id=r.id
|
||||||
select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count
|
left outer join SOURCE.result_projectcount rpc on rpc.id=r.id
|
||||||
from SOURCE.result_licenses rl
|
left outer join SOURCE.result_fundercount rfc on rfc.id=r.id
|
||||||
left outer join SOURCE.licenses_normalized rln on rl.type=rln.license
|
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||||
group by rl.id
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||||
) rln on rln.id=r.id
|
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, d.name, c.code, c.name;
|
||||||
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, d.name, c.code, c.name;
|
|
||||||
|
|
||||||
create table TARGET.result_affiliated_organization stored as parquet as
|
create table TARGET.result_affiliated_organization stored as parquet as
|
||||||
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
select
|
||||||
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
count(distinct r.id) as total,
|
||||||
r.peer_reviewed, coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, o.name as oname
|
r.green,
|
||||||
|
r.gold,
|
||||||
|
case when rl.type is not null then true else false end as licence,
|
||||||
|
case when pids.pid is not null then true else false end as pid,
|
||||||
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||||
|
r.peer_reviewed,
|
||||||
|
rln.cc_licence,
|
||||||
|
r.abstract as abstract,
|
||||||
|
r.authors > 1 as multiple_authors,
|
||||||
|
rpc.count > 1 as multiple_projects,
|
||||||
|
rfc.count > 1 as multiple_funders,
|
||||||
|
r.type,
|
||||||
|
o.name as oname
|
||||||
from SOURCE.result r
|
from SOURCE.result r
|
||||||
join SOURCE.result_organization ro on ro.id=r.id
|
join SOURCE.result_organization ro on ro.id=r.id
|
||||||
join SOURCE.organization o on o.id=ro.organization
|
join SOURCE.organization o on o.id=ro.organization
|
||||||
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
||||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||||
left outer join (
|
left outer join SOURCE.result_cc_licence rln on rln.id=r.id
|
||||||
select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count
|
left outer join SOURCE.result_projectcount rpc on rpc.id=r.id
|
||||||
from SOURCE.result_licenses rl
|
left outer join SOURCE.result_fundercount rfc on rfc.id=r.id
|
||||||
left outer join SOURCE.licenses_normalized rln on rl.type=rln.license
|
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||||
group by rl.id
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||||
) rln on rln.id=r.id
|
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, o.name;
|
||||||
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, o.name;
|
|
||||||
|
|
||||||
create table TARGET.result_affiliated_organization_country stored as parquet as
|
create table TARGET.result_affiliated_organization_country stored as parquet as
|
||||||
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
select
|
||||||
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
count(distinct r.id) as total,
|
||||||
r.peer_reviewed, coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, o.name as oname, c.code as ccode, c.name as cname
|
r.green,
|
||||||
|
r.gold,
|
||||||
|
case when rl.type is not null then true else false end as licence,
|
||||||
|
case when pids.pid is not null then true else false end as pid,
|
||||||
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||||
|
r.peer_reviewed,
|
||||||
|
rln.cc_licence,
|
||||||
|
r.abstract as abstract,
|
||||||
|
r.authors > 1 as multiple_authors,
|
||||||
|
rpc.count > 1 as multiple_projects,
|
||||||
|
rfc.count > 1 as multiple_funders,
|
||||||
|
r.type,
|
||||||
|
o.name as oname, c.code as ccode, c.name as cname
|
||||||
from SOURCE.result r
|
from SOURCE.result r
|
||||||
join SOURCE.result_organization ro on ro.id=r.id
|
join SOURCE.result_organization ro on ro.id=r.id
|
||||||
join SOURCE.organization o on o.id=ro.organization
|
join SOURCE.organization o on o.id=ro.organization
|
||||||
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
||||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||||
left outer join (
|
left outer join SOURCE.result_cc_licence rln on rln.id=r.id
|
||||||
select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count
|
left outer join SOURCE.result_projectcount rpc on rpc.id=r.id
|
||||||
from SOURCE.result_licenses rl
|
left outer join SOURCE.result_fundercount rfc on rfc.id=r.id
|
||||||
left outer join SOURCE.licenses_normalized rln on rl.type=rln.license
|
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||||
group by rl.id
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||||
) rln on rln.id=r.id
|
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, o.name, c.code, c.name;
|
||||||
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, o.name, c.code, c.name;
|
|
||||||
|
|
||||||
create table TARGET.result_affiliated_funder stored as parquet as
|
create table TARGET.result_affiliated_funder stored as parquet as
|
||||||
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
select
|
||||||
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed,
|
count(distinct r.id) as total,
|
||||||
coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, p.funder as pfunder
|
r.green,
|
||||||
|
r.gold,
|
||||||
|
case when rl.type is not null then true else false end as licence,
|
||||||
|
case when pids.pid is not null then true else false end as pid,
|
||||||
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||||
|
r.peer_reviewed,
|
||||||
|
rln.cc_licence,
|
||||||
|
r.abstract as abstract,
|
||||||
|
r.authors > 1 as multiple_authors,
|
||||||
|
rpc.count > 1 as multiple_projects,
|
||||||
|
rfc.count > 1 as multiple_funders,
|
||||||
|
r.type,
|
||||||
|
p.funder as pfunder
|
||||||
from SOURCE.result r
|
from SOURCE.result r
|
||||||
join SOURCE.result_organization ro on ro.id=r.id
|
join SOURCE.result_organization ro on ro.id=r.id
|
||||||
join SOURCE.organization o on o.id=ro.organization
|
join SOURCE.organization o on o.id=ro.organization
|
||||||
|
@ -140,18 +239,29 @@ join SOURCE.result_projects rp on rp.id=r.id
|
||||||
join SOURCE.project p on p.id=rp.project
|
join SOURCE.project p on p.id=rp.project
|
||||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||||
left outer join (
|
left outer join SOURCE.result_cc_licence rln on rln.id=r.id
|
||||||
select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count
|
left outer join SOURCE.result_projectcount rpc on rpc.id=r.id
|
||||||
from SOURCE.result_licenses rl
|
left outer join SOURCE.result_fundercount rfc on rfc.id=r.id
|
||||||
left outer join SOURCE.licenses_normalized rln on rl.type=rln.license
|
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||||
group by rl.id
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||||
) rln on rln.id=r.id
|
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, p.funder;
|
||||||
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, p.funder;
|
|
||||||
|
|
||||||
create table TARGET.result_affiliated_funder_country stored as parquet as
|
create table TARGET.result_affiliated_funder_country stored as parquet as
|
||||||
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
select
|
||||||
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
count(distinct r.id) as total,
|
||||||
r.peer_reviewed, coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, p.funder as pfunder, c.code as ccode, c.name as cname
|
r.green,
|
||||||
|
r.gold,
|
||||||
|
case when rl.type is not null then true else false end as licence,
|
||||||
|
case when pids.pid is not null then true else false end as pid,
|
||||||
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||||
|
r.peer_reviewed,
|
||||||
|
rln.cc_licence,
|
||||||
|
r.abstract as abstract,
|
||||||
|
r.authors > 1 as multiple_authors,
|
||||||
|
rpc.count > 1 as multiple_projects,
|
||||||
|
rfc.count > 1 as multiple_funders,
|
||||||
|
r.type,
|
||||||
|
p.funder as pfunder, c.code as ccode, c.name as cname
|
||||||
from SOURCE.result r
|
from SOURCE.result r
|
||||||
join SOURCE.result_organization ro on ro.id=r.id
|
join SOURCE.result_organization ro on ro.id=r.id
|
||||||
join SOURCE.organization o on o.id=ro.organization
|
join SOURCE.organization o on o.id=ro.organization
|
||||||
|
@ -160,18 +270,29 @@ join SOURCE.result_projects rp on rp.id=r.id
|
||||||
join SOURCE.project p on p.id=rp.project
|
join SOURCE.project p on p.id=rp.project
|
||||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||||
left outer join (
|
left outer join SOURCE.result_cc_licence rln on rln.id=r.id
|
||||||
select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count
|
left outer join SOURCE.result_projectcount rpc on rpc.id=r.id
|
||||||
from SOURCE.result_licenses rl
|
left outer join SOURCE.result_fundercount rfc on rfc.id=r.id
|
||||||
left outer join SOURCE.licenses_normalized rln on rl.type=rln.license
|
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||||
group by rl.id
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||||
) rln on rln.id=r.id
|
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, p.funder, c.code, c.name;
|
||||||
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, p.funder, c.code, c.name;
|
|
||||||
|
|
||||||
create table TARGET.result_deposited_country stored as parquet as
|
create table TARGET.result_deposited_country stored as parquet as
|
||||||
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
select
|
||||||
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
count(distinct r.id) as total,
|
||||||
r.peer_reviewed, coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, c.code as ccode, c.name as cname
|
r.green,
|
||||||
|
r.gold,
|
||||||
|
case when rl.type is not null then true else false end as licence,
|
||||||
|
case when pids.pid is not null then true else false end as pid,
|
||||||
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||||
|
r.peer_reviewed,
|
||||||
|
rln.cc_licence,
|
||||||
|
r.abstract as abstract,
|
||||||
|
r.authors > 1 as multiple_authors,
|
||||||
|
rpc.count > 1 as multiple_projects,
|
||||||
|
rfc.count > 1 as multiple_funders,
|
||||||
|
r.type,
|
||||||
|
c.code as ccode, c.name as cname
|
||||||
from SOURCE.result r
|
from SOURCE.result r
|
||||||
join SOURCE.result_datasources rd on rd.id=r.id
|
join SOURCE.result_datasources rd on rd.id=r.id
|
||||||
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
||||||
|
@ -180,18 +301,29 @@ join SOURCE.organization o on o.id=dor.organization
|
||||||
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
||||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||||
left outer join (
|
left outer join SOURCE.result_cc_licence rln on rln.id=r.id
|
||||||
select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count
|
left outer join SOURCE.result_projectcount rpc on rpc.id=r.id
|
||||||
from SOURCE.result_licenses rl
|
left outer join SOURCE.result_fundercount rfc on rfc.id=r.id
|
||||||
left outer join SOURCE.licenses_normalized rln on rl.type=rln.license
|
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||||
group by rl.id
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||||
) rln on rln.id=r.id
|
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, c.code, c.name;
|
||||||
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, c.code, c.name;
|
|
||||||
|
|
||||||
create table TARGET.result_deposited_year stored as parquet as
|
create table TARGET.result_deposited_year stored as parquet as
|
||||||
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
select
|
||||||
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed,
|
count(distinct r.id) as total,
|
||||||
coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, r.year
|
r.green,
|
||||||
|
r.gold,
|
||||||
|
case when rl.type is not null then true else false end as licence,
|
||||||
|
case when pids.pid is not null then true else false end as pid,
|
||||||
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||||
|
r.peer_reviewed,
|
||||||
|
rln.cc_licence,
|
||||||
|
r.abstract as abstract,
|
||||||
|
r.authors > 1 as multiple_authors,
|
||||||
|
rpc.count > 1 as multiple_projects,
|
||||||
|
rfc.count > 1 as multiple_funders,
|
||||||
|
r.type,
|
||||||
|
r.year
|
||||||
from SOURCE.result r
|
from SOURCE.result r
|
||||||
join SOURCE.result_datasources rd on rd.id=r.id
|
join SOURCE.result_datasources rd on rd.id=r.id
|
||||||
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
||||||
|
@ -200,18 +332,29 @@ join SOURCE.organization o on o.id=dor.organization
|
||||||
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
||||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||||
left outer join (
|
left outer join SOURCE.result_cc_licence rln on rln.id=r.id
|
||||||
select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count
|
left outer join SOURCE.result_projectcount rpc on rpc.id=r.id
|
||||||
from SOURCE.result_licenses rl
|
left outer join SOURCE.result_fundercount rfc on rfc.id=r.id
|
||||||
left outer join SOURCE.licenses_normalized rln on rl.type=rln.license
|
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||||
group by rl.id
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||||
) rln on rln.id=r.id
|
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, r.year;
|
||||||
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, r.year;
|
|
||||||
|
|
||||||
create table TARGET.result_deposited_year_country stored as parquet as
|
create table TARGET.result_deposited_year_country stored as parquet as
|
||||||
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
select
|
||||||
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
count(distinct r.id) as total,
|
||||||
r.peer_reviewed, coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, r.year, c.code as ccode, c.name as cname
|
r.green,
|
||||||
|
r.gold,
|
||||||
|
case when rl.type is not null then true else false end as licence,
|
||||||
|
case when pids.pid is not null then true else false end as pid,
|
||||||
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||||
|
r.peer_reviewed,
|
||||||
|
rln.cc_licence,
|
||||||
|
r.abstract as abstract,
|
||||||
|
r.authors > 1 as multiple_authors,
|
||||||
|
rpc.count > 1 as multiple_projects,
|
||||||
|
rfc.count > 1 as multiple_funders,
|
||||||
|
r.type,
|
||||||
|
r.year, c.code as ccode, c.name as cname
|
||||||
from SOURCE.result r
|
from SOURCE.result r
|
||||||
join SOURCE.result_datasources rd on rd.id=r.id
|
join SOURCE.result_datasources rd on rd.id=r.id
|
||||||
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
||||||
|
@ -220,18 +363,29 @@ join SOURCE.organization o on o.id=dor.organization
|
||||||
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
||||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||||
left outer join (
|
left outer join SOURCE.result_cc_licence rln on rln.id=r.id
|
||||||
select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count
|
left outer join SOURCE.result_projectcount rpc on rpc.id=r.id
|
||||||
from SOURCE.result_licenses rl
|
left outer join SOURCE.result_fundercount rfc on rfc.id=r.id
|
||||||
left outer join SOURCE.licenses_normalized rln on rl.type=rln.license
|
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||||
group by rl.id
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||||
) rln on rln.id=r.id
|
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, r.year, c.code, c.name;
|
||||||
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, r.year, c.code, c.name;
|
|
||||||
|
|
||||||
create table TARGET.result_deposited_datasource stored as parquet as
|
create table TARGET.result_deposited_datasource stored as parquet as
|
||||||
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
select
|
||||||
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
count(distinct r.id) as total,
|
||||||
r.peer_reviewed, coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, d.name as dname
|
r.green,
|
||||||
|
r.gold,
|
||||||
|
case when rl.type is not null then true else false end as licence,
|
||||||
|
case when pids.pid is not null then true else false end as pid,
|
||||||
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||||
|
r.peer_reviewed,
|
||||||
|
rln.cc_licence,
|
||||||
|
r.abstract as abstract,
|
||||||
|
r.authors > 1 as multiple_authors,
|
||||||
|
rpc.count > 1 as multiple_projects,
|
||||||
|
rfc.count > 1 as multiple_funders,
|
||||||
|
r.type,
|
||||||
|
d.name as dname
|
||||||
from SOURCE.result r
|
from SOURCE.result r
|
||||||
join SOURCE.result_datasources rd on rd.id=r.id
|
join SOURCE.result_datasources rd on rd.id=r.id
|
||||||
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
||||||
|
@ -240,18 +394,29 @@ join SOURCE.organization o on o.id=dor.organization
|
||||||
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
||||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||||
left outer join (
|
left outer join SOURCE.result_cc_licence rln on rln.id=r.id
|
||||||
select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count
|
left outer join SOURCE.result_projectcount rpc on rpc.id=r.id
|
||||||
from SOURCE.result_licenses rl
|
left outer join SOURCE.result_fundercount rfc on rfc.id=r.id
|
||||||
left outer join SOURCE.licenses_normalized rln on rl.type=rln.license
|
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||||
group by rl.id
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||||
) rln on rln.id=r.id
|
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, d.name;
|
||||||
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, d.name;
|
|
||||||
|
|
||||||
create table TARGET.result_deposited_datasource_country stored as parquet as
|
create table TARGET.result_deposited_datasource_country stored as parquet as
|
||||||
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
select
|
||||||
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
count(distinct r.id) as total,
|
||||||
r.peer_reviewed, coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, d.name as dname, c.code as ccode, c.name as cname
|
r.green,
|
||||||
|
r.gold,
|
||||||
|
case when rl.type is not null then true else false end as licence,
|
||||||
|
case when pids.pid is not null then true else false end as pid,
|
||||||
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||||
|
r.peer_reviewed,
|
||||||
|
rln.cc_licence,
|
||||||
|
r.abstract as abstract,
|
||||||
|
r.authors > 1 as multiple_authors,
|
||||||
|
rpc.count > 1 as multiple_projects,
|
||||||
|
rfc.count > 1 as multiple_funders,
|
||||||
|
r.type,
|
||||||
|
d.name as dname, c.code as ccode, c.name as cname
|
||||||
from SOURCE.result r
|
from SOURCE.result r
|
||||||
join SOURCE.result_datasources rd on rd.id=r.id
|
join SOURCE.result_datasources rd on rd.id=r.id
|
||||||
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
||||||
|
@ -260,18 +425,29 @@ join SOURCE.organization o on o.id=dor.organization
|
||||||
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
||||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||||
left outer join (
|
left outer join SOURCE.result_cc_licence rln on rln.id=r.id
|
||||||
select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count
|
left outer join SOURCE.result_projectcount rpc on rpc.id=r.id
|
||||||
from SOURCE.result_licenses rl
|
left outer join SOURCE.result_fundercount rfc on rfc.id=r.id
|
||||||
left outer join SOURCE.licenses_normalized rln on rl.type=rln.license
|
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||||
group by rl.id
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||||
) rln on rln.id=r.id
|
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, d.name, c.code, c.name;
|
||||||
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, d.name, c.code, c.name;
|
|
||||||
|
|
||||||
create table TARGET.result_deposited_organization stored as parquet as
|
create table TARGET.result_deposited_organization stored as parquet as
|
||||||
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
select
|
||||||
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed,
|
count(distinct r.id) as total,
|
||||||
coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, o.name as oname
|
r.green,
|
||||||
|
r.gold,
|
||||||
|
case when rl.type is not null then true else false end as licence,
|
||||||
|
case when pids.pid is not null then true else false end as pid,
|
||||||
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||||
|
r.peer_reviewed,
|
||||||
|
rln.cc_licence,
|
||||||
|
r.abstract as abstract,
|
||||||
|
r.authors > 1 as multiple_authors,
|
||||||
|
rpc.count > 1 as multiple_projects,
|
||||||
|
rfc.count > 1 as multiple_funders,
|
||||||
|
r.type,
|
||||||
|
o.name as oname
|
||||||
from SOURCE.result r
|
from SOURCE.result r
|
||||||
join SOURCE.result_datasources rd on rd.id=r.id
|
join SOURCE.result_datasources rd on rd.id=r.id
|
||||||
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
||||||
|
@ -280,18 +456,29 @@ join SOURCE.organization o on o.id=dor.organization
|
||||||
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
||||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||||
left outer join (
|
left outer join SOURCE.result_cc_licence rln on rln.id=r.id
|
||||||
select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count
|
left outer join SOURCE.result_projectcount rpc on rpc.id=r.id
|
||||||
from SOURCE.result_licenses rl
|
left outer join SOURCE.result_fundercount rfc on rfc.id=r.id
|
||||||
left outer join SOURCE.licenses_normalized rln on rl.type=rln.license
|
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||||
group by rl.id
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||||
) rln on rln.id=r.id
|
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, o.name;
|
||||||
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, o.name;
|
|
||||||
|
|
||||||
create table TARGET.result_deposited_organization_country stored as parquet as
|
create table TARGET.result_deposited_organization_country stored as parquet as
|
||||||
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
select
|
||||||
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
count(distinct r.id) as total,
|
||||||
r.peer_reviewed, coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, o.name as oname, c.code as ccode, c.name as cname
|
r.green,
|
||||||
|
r.gold,
|
||||||
|
case when rl.type is not null then true else false end as licence,
|
||||||
|
case when pids.pid is not null then true else false end as pid,
|
||||||
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||||
|
r.peer_reviewed,
|
||||||
|
rln.cc_licence,
|
||||||
|
r.abstract as abstract,
|
||||||
|
r.authors > 1 as multiple_authors,
|
||||||
|
rpc.count > 1 as multiple_projects,
|
||||||
|
rfc.count > 1 as multiple_funders,
|
||||||
|
r.type,
|
||||||
|
o.name as oname, c.code as ccode, c.name as cname
|
||||||
from SOURCE.result r
|
from SOURCE.result r
|
||||||
join SOURCE.result_datasources rd on rd.id=r.id
|
join SOURCE.result_datasources rd on rd.id=r.id
|
||||||
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
||||||
|
@ -300,18 +487,29 @@ join SOURCE.organization o on o.id=dor.organization
|
||||||
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
||||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||||
left outer join (
|
left outer join SOURCE.result_cc_licence rln on rln.id=r.id
|
||||||
select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count
|
left outer join SOURCE.result_projectcount rpc on rpc.id=r.id
|
||||||
from SOURCE.result_licenses rl
|
left outer join SOURCE.result_fundercount rfc on rfc.id=r.id
|
||||||
left outer join SOURCE.licenses_normalized rln on rl.type=rln.license
|
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||||
group by rl.id
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||||
) rln on rln.id=r.id
|
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, o.name, c.code, c.name;
|
||||||
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, o.name, c.code, c.name;
|
|
||||||
|
|
||||||
create table TARGET.result_deposited_funder stored as parquet as
|
create table TARGET.result_deposited_funder stored as parquet as
|
||||||
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
select
|
||||||
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
count(distinct r.id) as total,
|
||||||
r.peer_reviewed, coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, p.funder as pfunder
|
r.green,
|
||||||
|
r.gold,
|
||||||
|
case when rl.type is not null then true else false end as licence,
|
||||||
|
case when pids.pid is not null then true else false end as pid,
|
||||||
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||||
|
r.peer_reviewed,
|
||||||
|
rln.cc_licence,
|
||||||
|
r.abstract as abstract,
|
||||||
|
r.authors > 1 as multiple_authors,
|
||||||
|
rpc.count > 1 as multiple_projects,
|
||||||
|
rfc.count > 1 as multiple_funders,
|
||||||
|
r.type,
|
||||||
|
p.funder as pfunder
|
||||||
from SOURCE.result r
|
from SOURCE.result r
|
||||||
join SOURCE.result_datasources rd on rd.id=r.id
|
join SOURCE.result_datasources rd on rd.id=r.id
|
||||||
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
||||||
|
@ -322,18 +520,29 @@ join SOURCE.result_projects rp on rp.id=r.id
|
||||||
join SOURCE.project p on p.id=rp.project
|
join SOURCE.project p on p.id=rp.project
|
||||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||||
left outer join (
|
left outer join SOURCE.result_cc_licence rln on rln.id=r.id
|
||||||
select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count
|
left outer join SOURCE.result_projectcount rpc on rpc.id=r.id
|
||||||
from SOURCE.result_licenses rl
|
left outer join SOURCE.result_fundercount rfc on rfc.id=r.id
|
||||||
left outer join SOURCE.licenses_normalized rln on rl.type=rln.license
|
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||||
group by rl.id
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||||
) rln on rln.id=r.id
|
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, p.funder;
|
||||||
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, p.funder;
|
|
||||||
|
|
||||||
create table TARGET.result_deposited_funder_country stored as parquet as
|
create table TARGET.result_deposited_funder_country stored as parquet as
|
||||||
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
select
|
||||||
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
count(distinct r.id) as total,
|
||||||
r.peer_reviewed, coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, p.funder as pfunder, c.code as ccode, c.name as cname
|
r.green,
|
||||||
|
r.gold,
|
||||||
|
case when rl.type is not null then true else false end as licence,
|
||||||
|
case when pids.pid is not null then true else false end as pid,
|
||||||
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||||
|
r.peer_reviewed,
|
||||||
|
rln.cc_licence,
|
||||||
|
r.abstract as abstract,
|
||||||
|
r.authors > 1 as multiple_authors,
|
||||||
|
rpc.count > 1 as multiple_projects,
|
||||||
|
rfc.count > 1 as multiple_funders,
|
||||||
|
r.type,
|
||||||
|
p.funder as pfunder, c.code as ccode, c.name as cname
|
||||||
from SOURCE.result r
|
from SOURCE.result r
|
||||||
join SOURCE.result_datasources rd on rd.id=r.id
|
join SOURCE.result_datasources rd on rd.id=r.id
|
||||||
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
||||||
|
@ -344,29 +553,28 @@ join SOURCE.result_projects rp on rp.id=r.id
|
||||||
join SOURCE.project p on p.id=rp.project
|
join SOURCE.project p on p.id=rp.project
|
||||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||||
left outer join (
|
left outer join SOURCE.result_cc_licence rln on rln.id=r.id
|
||||||
select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count
|
left outer join SOURCE.result_projectcount rpc on rpc.id=r.id
|
||||||
from SOURCE.result_licenses rl
|
left outer join SOURCE.result_fundercount rfc on rfc.id=r.id
|
||||||
left outer join SOURCE.licenses_normalized rln on rl.type=rln.license
|
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||||
group by rl.id
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||||
) rln on rln.id=r.id
|
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, p.funder, c.code, c.name;
|
||||||
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, p.funder, c.code, c.name;
|
|
||||||
|
|
||||||
compute stats TARGET.result_affiliated_country;
|
-- compute stats TARGET.result_affiliated_country;
|
||||||
compute stats TARGET.result_affiliated_year;
|
-- compute stats TARGET.result_affiliated_year;
|
||||||
compute stats TARGET.result_affiliated_year_country;
|
-- compute stats TARGET.result_affiliated_year_country;
|
||||||
compute stats TARGET.result_affiliated_datasource;
|
-- compute stats TARGET.result_affiliated_datasource;
|
||||||
compute stats TARGET.result_affiliated_datasource_country;
|
-- compute stats TARGET.result_affiliated_datasource_country;
|
||||||
compute stats TARGET.result_affiliated_organization;
|
-- compute stats TARGET.result_affiliated_organization;
|
||||||
compute stats TARGET.result_affiliated_organization_country;
|
-- compute stats TARGET.result_affiliated_organization_country;
|
||||||
compute stats TARGET.result_affiliated_funder;
|
-- compute stats TARGET.result_affiliated_funder;
|
||||||
compute stats TARGET.result_affiliated_funder_country;
|
-- compute stats TARGET.result_affiliated_funder_country;
|
||||||
compute stats TARGET.result_deposited_country;
|
-- compute stats TARGET.result_deposited_country;
|
||||||
compute stats TARGET.result_deposited_year;
|
-- compute stats TARGET.result_deposited_year;
|
||||||
compute stats TARGET.result_deposited_year_country;
|
-- compute stats TARGET.result_deposited_year_country;
|
||||||
compute stats TARGET.result_deposited_datasource;
|
-- compute stats TARGET.result_deposited_datasource;
|
||||||
compute stats TARGET.result_deposited_datasource_country;
|
-- compute stats TARGET.result_deposited_datasource_country;
|
||||||
compute stats TARGET.result_deposited_organization;
|
-- compute stats TARGET.result_deposited_organization;
|
||||||
compute stats TARGET.result_deposited_organization_country;
|
-- compute stats TARGET.result_deposited_organization_country;
|
||||||
compute stats TARGET.result_deposited_funder;
|
-- compute stats TARGET.result_deposited_funder;
|
||||||
compute stats TARGET.result_deposited_funder_country;
|
-- compute stats TARGET.result_deposited_funder_country;
|
Loading…
Reference in New Issue