forked from D-Net/dnet-hadoop
Merge pull request '[stats] updates in the mapping, indicators, wf' (#145) from antonis.lempesis/dnet-hadoop:beta into beta
Reviewed-on: D-Net/dnet-hadoop#145beta
commit
a53acfbc06
@ -0,0 +1,16 @@
|
||||
export PYTHON_EGG_CACHE=/home/$(whoami)/.python-eggs
|
||||
export link_folder=/tmp/impala-shell-python-egg-cache-$(whoami)
|
||||
if ! [ -L $link_folder ]
|
||||
then
|
||||
rm -Rf "$link_folder"
|
||||
ln -sfn ${PYTHON_EGG_CACHE}${link_folder} ${link_folder}
|
||||
fi
|
||||
|
||||
export SOURCE=$1
|
||||
export TARGET=$2
|
||||
export SHADOW=$3
|
||||
|
||||
echo "Creating observatory database"
|
||||
impala-shell -q "drop database if exists ${TARGET} cascade"
|
||||
impala-shell -q "create database if not exists ${TARGET}"
|
||||
impala-shell -d ${SOURCE} -q "show tables" --delimited | sed "s/\(.*\)/create view ${TARGET}.\1 as select * from ${SOURCE}.\1;/" | impala-shell -f -
|
@ -1,62 +0,0 @@
|
||||
----------------------------------------------------
|
||||
-- Shortcuts for various definitions in stats db ---
|
||||
----------------------------------------------------
|
||||
|
||||
-- Peer reviewed:
|
||||
-- Results that have been collected from Crossref
|
||||
create table ${stats_db_name}.result_peerreviewed as
|
||||
with peer_reviewed as (
|
||||
select distinct r.id as id
|
||||
from ${stats_db_name}.result r
|
||||
join ${stats_db_name}.result_sources rs on rs.id=r.id
|
||||
join ${stats_db_name}.datasource d on d.id=rs.datasource
|
||||
where d.name='Crossref')
|
||||
select distinct peer_reviewed.id as id, true as peer_reviewed
|
||||
from peer_reviewed
|
||||
union all
|
||||
select distinct r.id as id, false as peer_reviewed
|
||||
from ${stats_db_name}.result r
|
||||
left outer join peer_reviewed pr on pr.id=r.id
|
||||
where pr.id is null;
|
||||
|
||||
-- Green OA:
|
||||
-- OA results that are hosted by an Institutional repository and have NOT been harvested from a DOAJ journal.
|
||||
create table ${stats_db_name}.result_greenoa as
|
||||
with result_green as (
|
||||
select distinct r.id as id
|
||||
from ${stats_db_name}.result r
|
||||
join ${stats_db_name}.result_datasources rd on rd.id=r.id
|
||||
join ${stats_db_name}.datasource d on d.id=rd.datasource
|
||||
left outer join (
|
||||
select rd.id from ${stats_db_name}.result_datasources rd
|
||||
join ${stats_db_name}.datasource d on rd.datasource=d.id
|
||||
join ${stats_db_name}.datasource_sources sds on sds.id=d.id
|
||||
join ${stats_db_name}.datasource sd on sd.id=sds.datasource
|
||||
where sd.name='DOAJ-ARTICLES'
|
||||
) as doaj on doaj.id=r.id
|
||||
where r.bestlicence in ('Open Access', 'Open Source') and d.type='Institutional Repository' and doaj.id is null)
|
||||
select distinct result_green.id, true as green
|
||||
from result_green
|
||||
union all
|
||||
select distinct r.id as id, false as green
|
||||
from ${stats_db_name}.result r
|
||||
left outer join result_green rg on rg.id=r.id
|
||||
where rg.id is null;
|
||||
|
||||
-- GOLD OA:
|
||||
-- OA results that have been harvested from a DOAJ journal.
|
||||
create table ${stats_db_name}.result_gold as
|
||||
with result_gold as (
|
||||
select distinct r.id as id
|
||||
from ${stats_db_name}.result r
|
||||
join ${stats_db_name}.result_datasources rd on rd.id=r.id
|
||||
join ${stats_db_name}.datasource d on d.id=rd.datasource
|
||||
join ${stats_db_name}.datasource_sources sds on sds.id=d.id
|
||||
join ${stats_db_name}.datasource sd on sd.id=sds.datasource
|
||||
where r.type='publication' and r.bestlicence='Open Access' and sd.name='DOAJ-Articles')
|
||||
select distinct result_gold.id, true as gold
|
||||
from result_gold
|
||||
union all
|
||||
select distinct r.id, false as gold
|
||||
from ${stats_db_name}.result r
|
||||
where r.id not in (select id from result_gold);
|
@ -0,0 +1,22 @@
|
||||
----------------------------------------------------
|
||||
-- Shortcuts for various definitions in stats db ---
|
||||
----------------------------------------------------
|
||||
|
||||
-- Peer reviewed:
|
||||
create table ${stats_db_name}.result_peerreviewed as
|
||||
select r.id as id, case when doi.doi_from_crossref=1 and grey.grey_lit=0 then true else false end as peer_reviewed
|
||||
from ${stats_db_name}.result r
|
||||
left outer join ${stats_db_name}.indi_pub_doi_from_crossref doi on doi.id=r.id
|
||||
left outer join ${stats_db_name}.indi_pub_grey_lit grey on grey.id=r.id;
|
||||
|
||||
-- Green OA:
|
||||
create table ${stats_db_name}.result_greenoa as
|
||||
select r.id, case when green.green_oa=1 then true else false end as green
|
||||
from ${stats_db_name}.result r
|
||||
left outer join ${stats_db_name}.indi_pub_green_oa green on green.id=r.id;
|
||||
|
||||
-- GOLD OA:
|
||||
create table ${stats_db_name}.result_gold as
|
||||
select r.id, case when gold.gold_oa=1 then true else false end as gold
|
||||
from ${stats_db_name}.result r
|
||||
left outer join ${stats_db_name}.indi_pub_gold_oa gold on gold.id=r.id;
|
@ -1,259 +1,561 @@
|
||||
create table TARGET.result_affiliated_country stored as parquet as
|
||||
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||
r.peer_reviewed, r.type, c.code as ccode, c.name as cname
|
||||
from SOURCE.result r
|
||||
join SOURCE.result_organization ro on ro.id=r.id
|
||||
join SOURCE.organization o on o.id=ro.organization
|
||||
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, c.code, c.name;
|
||||
create table ${observatory_db_name}.result_cc_licence stored as parquet as
|
||||
select r.id, coalesce(rln.count, 0) > 0 as cc_licence
|
||||
from ${stats_db_name}.result r
|
||||
left outer join (
|
||||
select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count
|
||||
from ${stats_db_name}.result_licenses rl
|
||||
left outer join ${stats_db_name}.licenses_normalized rln on rl.type=rln.license
|
||||
group by rl.id
|
||||
) rln on rln.id=r.id;
|
||||
|
||||
create table TARGET.result_affiliated_year stored as parquet as
|
||||
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, r.type, r.year
|
||||
from SOURCE.result r
|
||||
join SOURCE.result_organization ro on ro.id=r.id
|
||||
join SOURCE.organization o on o.id=ro.organization
|
||||
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, r.year;
|
||||
create table ${observatory_db_name}.result_affiliated_country stored as parquet as
|
||||
select
|
||||
count(distinct r.id) as total,
|
||||
r.green,
|
||||
r.gold,
|
||||
case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||
r.peer_reviewed,
|
||||
rln.cc_licence,
|
||||
r.abstract as abstract,
|
||||
r.authors > 1 as multiple_authors,
|
||||
rpc.count > 1 as multiple_projects,
|
||||
rfc.count > 1 as multiple_funders,
|
||||
r.type,
|
||||
c.code as ccode, c.name as cname
|
||||
from ${stats_db_name}.result r
|
||||
join ${stats_db_name}.result_organization ro on ro.id=r.id
|
||||
join ${stats_db_name}.organization o on o.id=ro.organization
|
||||
join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
|
||||
left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
|
||||
left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
|
||||
left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id
|
||||
left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
|
||||
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, c.code, c.name;
|
||||
|
||||
create table TARGET.result_affiliated_year_country stored as parquet as
|
||||
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||
r.peer_reviewed, r.type, r.year, c.code as ccode, c.name as cname
|
||||
from SOURCE.result r
|
||||
join SOURCE.result_organization ro on ro.id=r.id
|
||||
join SOURCE.organization o on o.id=ro.organization
|
||||
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, r.year, c.code, c.name;
|
||||
create table ${observatory_db_name}.result_affiliated_year stored as parquet as
|
||||
select
|
||||
count(distinct r.id) as total,
|
||||
r.green,
|
||||
r.gold,
|
||||
case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||
r.peer_reviewed,
|
||||
rln.cc_licence,
|
||||
r.abstract as abstract,
|
||||
r.authors > 1 as multiple_authors,
|
||||
rpc.count > 1 as multiple_projects,
|
||||
rfc.count > 1 as multiple_funders,
|
||||
r.type,
|
||||
r.year
|
||||
from ${stats_db_name}.result r
|
||||
join ${stats_db_name}.result_organization ro on ro.id=r.id
|
||||
join ${stats_db_name}.organization o on o.id=ro.organization
|
||||
join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
|
||||
left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
|
||||
left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
|
||||
left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id
|
||||
left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
|
||||
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, r.year;
|
||||
|
||||
create table TARGET.result_affiliated_datasource stored as parquet as
|
||||
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, r.type, d.name as dname
|
||||
from SOURCE.result r
|
||||
join SOURCE.result_organization ro on ro.id=r.id
|
||||
join SOURCE.organization o on o.id=ro.organization
|
||||
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
||||
left outer join SOURCE.result_datasources rd on rd.id=r.id
|
||||
left outer join SOURCE.datasource d on d.id=rd.datasource
|
||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, d.name;
|
||||
create table ${observatory_db_name}.result_affiliated_year_country stored as parquet as
|
||||
select
|
||||
count(distinct r.id) as total,
|
||||
r.green,
|
||||
r.gold,
|
||||
case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||
r.peer_reviewed,
|
||||
rln.cc_licence,
|
||||
r.abstract as abstract,
|
||||
r.authors > 1 as multiple_authors,
|
||||
rpc.count > 1 as multiple_projects,
|
||||
rfc.count > 1 as multiple_funders,
|
||||
r.type,
|
||||
r.year, c.code as ccode, c.name as cname
|
||||
from ${stats_db_name}.result r
|
||||
join ${stats_db_name}.result_organization ro on ro.id=r.id
|
||||
join ${stats_db_name}.organization o on o.id=ro.organization
|
||||
join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
|
||||
left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
|
||||
left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
|
||||
left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id
|
||||
left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
|
||||
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, r.year, c.code, c.name;
|
||||
|
||||
create table TARGET.result_affiliated_datasource_country stored as parquet as
|
||||
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||
r.peer_reviewed, r.type, d.name as dname, c.code as ccode, c.name as cname
|
||||
from SOURCE.result r
|
||||
join SOURCE.result_organization ro on ro.id=r.id
|
||||
join SOURCE.organization o on o.id=ro.organization
|
||||
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
||||
left outer join SOURCE.result_datasources rd on rd.id=r.id
|
||||
left outer join SOURCE.datasource d on d.id=rd.datasource
|
||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, d.name, c.code, c.name;
|
||||
create table ${observatory_db_name}.result_affiliated_datasource stored as parquet as
|
||||
select
|
||||
count(distinct r.id) as total,
|
||||
r.green,
|
||||
r.gold,
|
||||
case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||
r.peer_reviewed,
|
||||
rln.cc_licence,
|
||||
r.abstract as abstract,
|
||||
r.authors > 1 as multiple_authors,
|
||||
rpc.count > 1 as multiple_projects,
|
||||
rfc.count > 1 as multiple_funders,
|
||||
r.type,
|
||||
d.name as dname
|
||||
from ${stats_db_name}.result r
|
||||
join ${stats_db_name}.result_organization ro on ro.id=r.id
|
||||
join ${stats_db_name}.organization o on o.id=ro.organization
|
||||
join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
|
||||
left outer join ${stats_db_name}.result_datasources rd on rd.id=r.id
|
||||
left outer join ${stats_db_name}.datasource d on d.id=rd.datasource
|
||||
left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
|
||||
left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
|
||||
left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id
|
||||
left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
|
||||
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, d.name;
|
||||
|
||||
create table TARGET.result_affiliated_organization stored as parquet as
|
||||
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||
r.peer_reviewed, r.type, o.name as oname
|
||||
from SOURCE.result r
|
||||
join SOURCE.result_organization ro on ro.id=r.id
|
||||
join SOURCE.organization o on o.id=ro.organization
|
||||
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, o.name;
|
||||
create table ${observatory_db_name}.result_affiliated_datasource_country stored as parquet as
|
||||
select
|
||||
count(distinct r.id) as total,
|
||||
r.green,
|
||||
r.gold,
|
||||
case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||
r.peer_reviewed,
|
||||
rln.cc_licence,
|
||||
r.abstract as abstract,
|
||||
r.authors > 1 as multiple_authors,
|
||||
rpc.count > 1 as multiple_projects,
|
||||
rfc.count > 1 as multiple_funders,
|
||||
r.type,
|
||||
d.name as dname, c.code as ccode, c.name as cname
|
||||
from ${stats_db_name}.result r
|
||||
join ${stats_db_name}.result_organization ro on ro.id=r.id
|
||||
join ${stats_db_name}.organization o on o.id=ro.organization
|
||||
join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
|
||||
left outer join ${stats_db_name}.result_datasources rd on rd.id=r.id
|
||||
left outer join ${stats_db_name}.datasource d on d.id=rd.datasource
|
||||
left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
|
||||
left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
|
||||
left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id
|
||||
left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
|
||||
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, d.name, c.code, c.name;
|
||||
|
||||
create table TARGET.result_affiliated_organization_country stored as parquet as
|
||||
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||
r.peer_reviewed, r.type, o.name as oname, c.code as ccode, c.name as cname
|
||||
from SOURCE.result r
|
||||
join SOURCE.result_organization ro on ro.id=r.id
|
||||
join SOURCE.organization o on o.id=ro.organization
|
||||
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, o.name, c.code, c.name;
|
||||
create table ${observatory_db_name}.result_affiliated_organization stored as parquet as
|
||||
select
|
||||
count(distinct r.id) as total,
|
||||
r.green,
|
||||
r.gold,
|
||||
case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||
r.peer_reviewed,
|
||||
rln.cc_licence,
|
||||
r.abstract as abstract,
|
||||
r.authors > 1 as multiple_authors,
|
||||
rpc.count > 1 as multiple_projects,
|
||||
rfc.count > 1 as multiple_funders,
|
||||
r.type,
|
||||
o.name as oname
|
||||
from ${stats_db_name}.result r
|
||||
join ${stats_db_name}.result_organization ro on ro.id=r.id
|
||||
join ${stats_db_name}.organization o on o.id=ro.organization
|
||||
join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
|
||||
left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
|
||||
left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
|
||||
left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id
|
||||
left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
|
||||
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, o.name;
|
||||
|
||||
create table TARGET.result_affiliated_funder stored as parquet as
|
||||
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, r.type, p.funder as pfunder
|
||||
from SOURCE.result r
|
||||
join SOURCE.result_organization ro on ro.id=r.id
|
||||
join SOURCE.organization o on o.id=ro.organization
|
||||
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
||||
join SOURCE.result_projects rp on rp.id=r.id
|
||||
join SOURCE.project p on p.id=rp.project
|
||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, p.funder;
|
||||
create table ${observatory_db_name}.result_affiliated_organization_country stored as parquet as
|
||||
select
|
||||
count(distinct r.id) as total,
|
||||
r.green,
|
||||
r.gold,
|
||||
case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||
r.peer_reviewed,
|
||||
rln.cc_licence,
|
||||
r.abstract as abstract,
|
||||
r.authors > 1 as multiple_authors,
|
||||
rpc.count > 1 as multiple_projects,
|
||||
rfc.count > 1 as multiple_funders,
|
||||
r.type,
|
||||
o.name as oname, c.code as ccode, c.name as cname
|
||||
from ${stats_db_name}.result r
|
||||
join ${stats_db_name}.result_organization ro on ro.id=r.id
|
||||
join ${stats_db_name}.organization o on o.id=ro.organization
|
||||
join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
|
||||
left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
|
||||
left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
|
||||
left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id
|
||||
left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
|
||||
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, o.name, c.code, c.name;
|
||||
|
||||
create table TARGET.result_affiliated_funder_country stored as parquet as
|
||||
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||
r.peer_reviewed, r.type, p.funder as pfunder, c.code as ccode, c.name as cname
|
||||
from SOURCE.result r
|
||||
join SOURCE.result_organization ro on ro.id=r.id
|
||||
join SOURCE.organization o on o.id=ro.organization
|
||||
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
||||
join SOURCE.result_projects rp on rp.id=r.id
|
||||
join SOURCE.project p on p.id=rp.project
|
||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, p.funder, c.code, c.name;
|
||||
create table ${observatory_db_name}.result_affiliated_funder stored as parquet as
|
||||
select
|
||||
count(distinct r.id) as total,
|
||||
r.green,
|
||||
r.gold,
|
||||
case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||
r.peer_reviewed,
|
||||
rln.cc_licence,
|
||||
r.abstract as abstract,
|
||||
r.authors > 1 as multiple_authors,
|
||||
rpc.count > 1 as multiple_projects,
|
||||
rfc.count > 1 as multiple_funders,
|
||||
r.type,
|
||||
p.funder as pfunder
|
||||
from ${stats_db_name}.result r
|
||||
join ${stats_db_name}.result_organization ro on ro.id=r.id
|
||||
join ${stats_db_name}.organization o on o.id=ro.organization
|
||||
join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
|
||||
join ${stats_db_name}.result_projects rp on rp.id=r.id
|
||||
join ${stats_db_name}.project p on p.id=rp.project
|
||||
left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
|
||||
left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
|
||||
left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id
|
||||
left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
|
||||
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, p.funder;
|
||||
|
||||
create table TARGET.result_deposited_country stored as parquet as
|
||||
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||
r.peer_reviewed, r.type, c.code as ccode, c.name as cname
|
||||
from SOURCE.result r
|
||||
join SOURCE.result_datasources rd on rd.id=r.id
|
||||
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
||||
join SOURCE.datasource_organizations dor on dor.id=d.id
|
||||
join SOURCE.organization o on o.id=dor.organization
|
||||
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, c.code, c.name;
|
||||
create table ${observatory_db_name}.result_affiliated_funder_country stored as parquet as
|
||||
select
|
||||
count(distinct r.id) as total,
|
||||
r.green,
|
||||
r.gold,
|
||||
case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||
r.peer_reviewed,
|
||||
rln.cc_licence,
|
||||
r.abstract as abstract,
|
||||
r.authors > 1 as multiple_authors,
|
||||
rpc.count > 1 as multiple_projects,
|
||||
rfc.count > 1 as multiple_funders,
|
||||
r.type,
|
||||
p.funder as pfunder, c.code as ccode, c.name as cname
|
||||
from ${stats_db_name}.result r
|
||||
join ${stats_db_name}.result_organization ro on ro.id=r.id
|
||||
join ${stats_db_name}.organization o on o.id=ro.organization
|
||||
join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
|
||||
join ${stats_db_name}.result_projects rp on rp.id=r.id
|
||||
join ${stats_db_name}.project p on p.id=rp.project
|
||||
left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
|
||||
left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
|
||||
left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id
|
||||
left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
|
||||
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, p.funder, c.code, c.name;
|
||||
|
||||
create table TARGET.result_deposited_year stored as parquet as
|
||||
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, r.type, r.year
|
||||
from SOURCE.result r
|
||||
join SOURCE.result_datasources rd on rd.id=r.id
|
||||
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
||||
join SOURCE.datasource_organizations dor on dor.id=d.id
|
||||
join SOURCE.organization o on o.id=dor.organization
|
||||
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, r.year;
|
||||
create table ${observatory_db_name}.result_deposited_country stored as parquet as
|
||||
select
|
||||
count(distinct r.id) as total,
|
||||
r.green,
|
||||
r.gold,
|
||||
case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||
r.peer_reviewed,
|
||||
rln.cc_licence,
|
||||
r.abstract as abstract,
|
||||
r.authors > 1 as multiple_authors,
|
||||
rpc.count > 1 as multiple_projects,
|
||||
rfc.count > 1 as multiple_funders,
|
||||
r.type,
|
||||
c.code as ccode, c.name as cname
|
||||
from ${stats_db_name}.result r
|
||||
join ${stats_db_name}.result_datasources rd on rd.id=r.id
|
||||
join ${stats_db_name}.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
||||
join ${stats_db_name}.datasource_organizations dor on dor.id=d.id
|
||||
join ${stats_db_name}.organization o on o.id=dor.organization
|
||||
join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
|
||||
left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
|
||||
left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
|
||||
left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id
|
||||
left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
|
||||
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, c.code, c.name;
|
||||
|
||||
create table TARGET.result_deposited_year_country stored as parquet as
|
||||
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||
r.peer_reviewed, r.type, r.year, c.code as ccode, c.name as cname
|
||||
from SOURCE.result r
|
||||
join SOURCE.result_datasources rd on rd.id=r.id
|
||||
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
||||
join SOURCE.datasource_organizations dor on dor.id=d.id
|
||||
join SOURCE.organization o on o.id=dor.organization
|
||||
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, r.year, c.code, c.name;
|
||||
create table ${observatory_db_name}.result_deposited_year stored as parquet as
|
||||
select
|
||||
count(distinct r.id) as total,
|
||||
r.green,
|
||||
r.gold,
|
||||
case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||
r.peer_reviewed,
|
||||
rln.cc_licence,
|
||||
r.abstract as abstract,
|
||||
r.authors > 1 as multiple_authors,
|
||||
rpc.count > 1 as multiple_projects,
|
||||
rfc.count > 1 as multiple_funders,
|
||||
r.type,
|
||||
r.year
|
||||
from ${stats_db_name}.result r
|
||||
join ${stats_db_name}.result_datasources rd on rd.id=r.id
|
||||
join ${stats_db_name}.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
||||
join ${stats_db_name}.datasource_organizations dor on dor.id=d.id
|
||||
join ${stats_db_name}.organization o on o.id=dor.organization
|
||||
join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
|
||||
left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
|
||||
left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
|
||||
left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id
|
||||
left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
|
||||
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, r.year;
|
||||
|
||||
create table TARGET.result_deposited_datasource stored as parquet as
|
||||
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||
r.peer_reviewed, r.type, d.name as dname
|
||||
from SOURCE.result r
|
||||
join SOURCE.result_datasources rd on rd.id=r.id
|
||||
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
||||
join SOURCE.datasource_organizations dor on dor.id=d.id
|
||||
join SOURCE.organization o on o.id=dor.organization
|
||||
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, d.name;
|
||||
create table ${observatory_db_name}.result_deposited_year_country stored as parquet as
|
||||
select
|
||||
count(distinct r.id) as total,
|
||||
r.green,
|
||||
r.gold,
|
||||
case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||
r.peer_reviewed,
|
||||
rln.cc_licence,
|
||||
r.abstract as abstract,
|
||||
r.authors > 1 as multiple_authors,
|
||||
rpc.count > 1 as multiple_projects,
|
||||
rfc.count > 1 as multiple_funders,
|
||||
r.type,
|
||||
r.year, c.code as ccode, c.name as cname
|
||||
from ${stats_db_name}.result r
|
||||
join ${stats_db_name}.result_datasources rd on rd.id=r.id
|
||||
join ${stats_db_name}.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
||||
join ${stats_db_name}.datasource_organizations dor on dor.id=d.id
|
||||
join ${stats_db_name}.organization o on o.id=dor.organization
|
||||
join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
|
||||
left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
|
||||
left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
|
||||
left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id
|
||||
left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
|
||||
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, r.year, c.code, c.name;
|
||||
|
||||
create table TARGET.result_deposited_datasource_country stored as parquet as
|
||||
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||
r.peer_reviewed, r.type, d.name as dname, c.code as ccode, c.name as cname
|
||||
from SOURCE.result r
|
||||
join SOURCE.result_datasources rd on rd.id=r.id
|
||||
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
||||
join SOURCE.datasource_organizations dor on dor.id=d.id
|
||||
join SOURCE.organization o on o.id=dor.organization
|
||||
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, d.name, c.code, c.name;
|
||||
create table ${observatory_db_name}.result_deposited_datasource stored as parquet as
|
||||
select
|
||||
count(distinct r.id) as total,
|
||||
r.green,
|
||||
r.gold,
|
||||
case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||
r.peer_reviewed,
|
||||
rln.cc_licence,
|
||||
r.abstract as abstract,
|
||||
r.authors > 1 as multiple_authors,
|
||||
rpc.count > 1 as multiple_projects,
|
||||
rfc.count > 1 as multiple_funders,
|
||||
r.type,
|
||||
d.name as dname
|
||||
from ${stats_db_name}.result r
|
||||
join ${stats_db_name}.result_datasources rd on rd.id=r.id
|
||||
join ${stats_db_name}.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
||||
join ${stats_db_name}.datasource_organizations dor on dor.id=d.id
|
||||
join ${stats_db_name}.organization o on o.id=dor.organization
|
||||
join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
|
||||
left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
|
||||
left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
|
||||
left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id
|
||||
left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
|
||||
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, d.name;
|
||||
|
||||
create table TARGET.result_deposited_organization stored as parquet as
|
||||
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, r.type, o.name as oname
|
||||
from SOURCE.result r
|
||||
join SOURCE.result_datasources rd on rd.id=r.id
|
||||
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
||||
join SOURCE.datasource_organizations dor on dor.id=d.id
|
||||
join SOURCE.organization o on o.id=dor.organization
|
||||
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, o.name;
|
||||
create table ${observatory_db_name}.result_deposited_datasource_country stored as parquet as
|
||||
select
|
||||
count(distinct r.id) as total,
|
||||
r.green,
|
||||
r.gold,
|
||||
case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||
r.peer_reviewed,
|
||||
rln.cc_licence,
|
||||
r.abstract as abstract,
|
||||
r.authors > 1 as multiple_authors,
|
||||
rpc.count > 1 as multiple_projects,
|
||||
rfc.count > 1 as multiple_funders,
|
||||
r.type,
|
||||
d.name as dname, c.code as ccode, c.name as cname
|
||||
from ${stats_db_name}.result r
|
||||
join ${stats_db_name}.result_datasources rd on rd.id=r.id
|
||||
join ${stats_db_name}.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
||||
join ${stats_db_name}.datasource_organizations dor on dor.id=d.id
|
||||
join ${stats_db_name}.organization o on o.id=dor.organization
|
||||
join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
|
||||
left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
|
||||
left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
|
||||
left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id
|
||||
left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
|
||||
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, d.name, c.code, c.name;
|
||||
|
||||
create table TARGET.result_deposited_organization_country stored as parquet as
|
||||
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||
r.peer_reviewed, r.type, o.name as oname, c.code as ccode, c.name as cname
|
||||
from SOURCE.result r
|
||||
join SOURCE.result_datasources rd on rd.id=r.id
|
||||
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
||||
join SOURCE.datasource_organizations dor on dor.id=d.id
|
||||
join SOURCE.organization o on o.id=dor.organization
|
||||
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, o.name, c.code, c.name;
|
||||
create table ${observatory_db_name}.result_deposited_organization stored as parquet as
|
||||
select
|
||||
count(distinct r.id) as total,
|
||||
r.green,
|
||||
r.gold,
|
||||
case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||
r.peer_reviewed,
|
||||
rln.cc_licence,
|
||||
r.abstract as abstract,
|
||||
r.authors > 1 as multiple_authors,
|
||||
rpc.count > 1 as multiple_projects,
|
||||
rfc.count > 1 as multiple_funders,
|
||||
r.type,
|
||||
o.name as oname
|
||||
from ${stats_db_name}.result r
|
||||
join ${stats_db_name}.result_datasources rd on rd.id=r.id
|
||||
join ${stats_db_name}.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
||||
join ${stats_db_name}.datasource_organizations dor on dor.id=d.id
|
||||
join ${stats_db_name}.organization o on o.id=dor.organization
|
||||
join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
|
||||
left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
|
||||
left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
|
||||
left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id
|
||||
left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
|
||||
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, o.name;
|
||||
|
||||
create table TARGET.result_deposited_funder stored as parquet as
|
||||
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||
r.peer_reviewed, r.type, p.funder as pfunder
|
||||
from SOURCE.result r
|
||||
join SOURCE.result_datasources rd on rd.id=r.id
|
||||
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
||||
join SOURCE.datasource_organizations dor on dor.id=d.id
|
||||
join SOURCE.organization o on o.id=dor.organization
|
||||
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
||||
join SOURCE.result_projects rp on rp.id=r.id
|
||||
join SOURCE.project p on p.id=rp.project
|
||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, p.funder;
|
||||
create table ${observatory_db_name}.result_deposited_organization_country stored as parquet as
|
||||
select
|
||||
count(distinct r.id) as total,
|
||||
r.green,
|
||||
r.gold,
|
||||
case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||
r.peer_reviewed,
|
||||
rln.cc_licence,
|
||||
r.abstract as abstract,
|
||||
r.authors > 1 as multiple_authors,
|
||||
rpc.count > 1 as multiple_projects,
|
||||
rfc.count > 1 as multiple_funders,
|
||||
r.type,
|
||||
o.name as oname, c.code as ccode, c.name as cname
|
||||
from ${stats_db_name}.result r
|
||||
join ${stats_db_name}.result_datasources rd on rd.id=r.id
|
||||
join ${stats_db_name}.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
||||
join ${stats_db_name}.datasource_organizations dor on dor.id=d.id
|
||||
join ${stats_db_name}.organization o on o.id=dor.organization
|
||||
join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
|
||||
left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
|
||||
left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
|
||||
left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id
|
||||
left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
|
||||
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, o.name, c.code, c.name;
|
||||
|
||||
create table TARGET.result_deposited_funder_country stored as parquet as
|
||||
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||
r.peer_reviewed, r.type, p.funder as pfunder, c.code as ccode, c.name as cname
|
||||
from SOURCE.result r
|
||||
join SOURCE.result_datasources rd on rd.id=r.id
|
||||
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
||||
join SOURCE.datasource_organizations dor on dor.id=d.id
|
||||
join SOURCE.organization o on o.id=dor.organization
|
||||
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
||||
join SOURCE.result_projects rp on rp.id=r.id
|
||||
join SOURCE.project p on p.id=rp.project
|
||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, p.funder, c.code, c.name;
|
||||
create table ${observatory_db_name}.result_deposited_funder stored as parquet as
|
||||
select
|
||||
count(distinct r.id) as total,
|
||||
r.green,
|
||||
r.gold,
|
||||
case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||
r.peer_reviewed,
|
||||
rln.cc_licence,
|
||||
r.abstract as abstract,
|
||||
r.authors > 1 as multiple_authors,
|
||||
rpc.count > 1 as multiple_projects,
|
||||
rfc.count > 1 as multiple_funders,
|
||||
r.type,
|
||||
p.funder as pfunder
|
||||
from ${stats_db_name}.result r
|
||||
join ${stats_db_name}.result_datasources rd on rd.id=r.id
|
||||
join ${stats_db_name}.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
||||
join ${stats_db_name}.datasource_organizations dor on dor.id=d.id
|
||||
join ${stats_db_name}.organization o on o.id=dor.organization
|
||||
join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
|
||||
join ${stats_db_name}.result_projects rp on rp.id=r.id
|
||||
join ${stats_db_name}.project p on p.id=rp.project
|
||||
left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
|
||||
left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
|
||||
left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id
|
||||
left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
|
||||
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, p.funder;
|
||||
|
||||
compute stats TARGET.result_affiliated_country;
|
||||
compute stats TARGET.result_affiliated_year;
|
||||
compute stats TARGET.result_affiliated_year_country;
|
||||
compute stats TARGET.result_affiliated_datasource;
|
||||
compute stats TARGET.result_affiliated_datasource_country;
|
||||
compute stats TARGET.result_affiliated_organization;
|
||||
compute stats TARGET.result_affiliated_organization_country;
|
||||
compute stats TARGET.result_affiliated_funder;
|
||||
compute stats TARGET.result_affiliated_funder_country;
|
||||
compute stats TARGET.result_deposited_country;
|
||||
compute stats TARGET.result_deposited_year;
|
||||
compute stats TARGET.result_deposited_year_country;
|
||||
compute stats TARGET.result_deposited_datasource;
|
||||
compute stats TARGET.result_deposited_datasource_country;
|
||||
compute stats TARGET.result_deposited_organization;
|
||||
compute stats TARGET.result_deposited_organization_country;
|
||||
compute stats TARGET.result_deposited_funder;
|
||||
compute stats TARGET.result_deposited_funder_country;
|
||||
create table ${observatory_db_name}.result_deposited_funder_country stored as parquet as
|
||||
select
|
||||
count(distinct r.id) as total,
|
||||
r.green,
|
||||
r.gold,
|
||||
case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||
r.peer_reviewed,
|
||||
rln.cc_licence,
|
||||
r.abstract as abstract,
|
||||
r.authors > 1 as multiple_authors,
|
||||
rpc.count > 1 as multiple_projects,
|
||||
rfc.count > 1 as multiple_funders,
|
||||
r.type,
|
||||
p.funder as pfunder, c.code as ccode, c.name as cname
|
||||
from ${stats_db_name}.result r
|
||||
join ${stats_db_name}.result_datasources rd on rd.id=r.id
|
||||
join ${stats_db_name}.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
||||
join ${stats_db_name}.datasource_organizations dor on dor.id=d.id
|
||||
join ${stats_db_name}.organization o on o.id=dor.organization
|
||||
join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
|
||||
join ${stats_db_name}.result_projects rp on rp.id=r.id
|
||||
join ${stats_db_name}.project p on p.id=rp.project
|
||||
left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
|
||||
left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
|
||||
left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id
|
||||
left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
|
||||
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, p.funder, c.code, c.name;
|
Loading…
Reference in New Issue