forked from D-Net/dnet-hadoop
created hive action for observatory queries
This commit is contained in:
parent
8b681dcf1b
commit
421d55265d
|
@ -9,16 +9,7 @@ fi
|
||||||
export SOURCE=$1
|
export SOURCE=$1
|
||||||
export TARGET=$2
|
export TARGET=$2
|
||||||
export SHADOW=$3
|
export SHADOW=$3
|
||||||
export SCRIPT_PATH=$4
|
|
||||||
|
|
||||||
echo "Getting file from " $4
|
|
||||||
hdfs dfs -copyToLocal $4
|
|
||||||
|
|
||||||
echo "Creating observatory database"
|
|
||||||
impala-shell -q "drop database if exists ${TARGET} cascade"
|
|
||||||
impala-shell -q "create database if not exists ${TARGET}"
|
|
||||||
impala-shell -d ${SOURCE} -q "show tables" --delimited | sed "s/\(.*\)/create view ${TARGET}.\1 as select * from ${SOURCE}.\1;/" | impala-shell -f -
|
|
||||||
cat step21-createObservatoryDB.sql | sed s/SOURCE/$1/g | sed s/TARGET/$2/g1 | hive -f -
|
|
||||||
impala-shell -q "invalidate metadata;"
|
impala-shell -q "invalidate metadata;"
|
||||||
impala-shell -d ${TARGET} -q "show tables" --delimited | sed "s/\(.*\)/compute stats ${TARGET}.\1;/" | impala-shell -f -
|
impala-shell -d ${TARGET} -q "show tables" --delimited | sed "s/\(.*\)/compute stats ${TARGET}.\1;/" | impala-shell -f -
|
||||||
echo "Impala shell finished"
|
echo "Impala shell finished"
|
|
@ -0,0 +1,16 @@
|
||||||
|
export PYTHON_EGG_CACHE=/home/$(whoami)/.python-eggs
|
||||||
|
export link_folder=/tmp/impala-shell-python-egg-cache-$(whoami)
|
||||||
|
if ! [ -L $link_folder ]
|
||||||
|
then
|
||||||
|
rm -Rf "$link_folder"
|
||||||
|
ln -sfn ${PYTHON_EGG_CACHE}${link_folder} ${link_folder}
|
||||||
|
fi
|
||||||
|
|
||||||
|
export SOURCE=$1
|
||||||
|
export TARGET=$2
|
||||||
|
export SHADOW=$3
|
||||||
|
|
||||||
|
echo "Creating observatory database"
|
||||||
|
impala-shell -q "drop database if exists ${TARGET} cascade"
|
||||||
|
impala-shell -q "create database if not exists ${TARGET}"
|
||||||
|
impala-shell -d ${SOURCE} -q "show tables" --delimited | sed "s/\(.*\)/create view ${TARGET}.\1 as select * from ${SOURCE}.\1;/" | impala-shell -f -
|
|
@ -1,14 +1,14 @@
|
||||||
create table TARGET.result_cc_licence stored as parquet as
|
create table ${observatory_db_name}.result_cc_licence stored as parquet as
|
||||||
select r.id, coalesce(rln.count, 0) > 0 as cc_licence
|
select r.id, coalesce(rln.count, 0) > 0 as cc_licence
|
||||||
from SOURCE.result r
|
from ${stats_db_name}.result r
|
||||||
left outer join (
|
left outer join (
|
||||||
select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count
|
select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count
|
||||||
from SOURCE.result_licenses rl
|
from ${stats_db_name}.result_licenses rl
|
||||||
left outer join SOURCE.licenses_normalized rln on rl.type=rln.license
|
left outer join ${stats_db_name}.licenses_normalized rln on rl.type=rln.license
|
||||||
group by rl.id
|
group by rl.id
|
||||||
) rln on rln.id=r.id;
|
) rln on rln.id=r.id;
|
||||||
|
|
||||||
create table TARGET.result_affiliated_country stored as parquet as
|
create table ${observatory_db_name}.result_affiliated_country stored as parquet as
|
||||||
select
|
select
|
||||||
count(distinct r.id) as total,
|
count(distinct r.id) as total,
|
||||||
r.green,
|
r.green,
|
||||||
|
@ -24,20 +24,20 @@ select
|
||||||
rfc.count > 1 as multiple_funders,
|
rfc.count > 1 as multiple_funders,
|
||||||
r.type,
|
r.type,
|
||||||
c.code as ccode, c.name as cname
|
c.code as ccode, c.name as cname
|
||||||
from SOURCE.result r
|
from ${stats_db_name}.result r
|
||||||
join SOURCE.result_organization ro on ro.id=r.id
|
join ${stats_db_name}.result_organization ro on ro.id=r.id
|
||||||
join SOURCE.organization o on o.id=ro.organization
|
join ${stats_db_name}.organization o on o.id=ro.organization
|
||||||
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
|
||||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
|
||||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
|
||||||
left outer join SOURCE.result_cc_licence rln on rln.id=r.id
|
left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id
|
||||||
left outer join SOURCE.result_projectcount rpc on rpc.id=r.id
|
left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
|
||||||
left outer join SOURCE.result_fundercount rfc on rfc.id=r.id
|
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, c.code, c.name;
|
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, c.code, c.name;
|
||||||
|
|
||||||
create table TARGET.result_affiliated_year stored as parquet as
|
create table ${observatory_db_name}.result_affiliated_year stored as parquet as
|
||||||
select
|
select
|
||||||
count(distinct r.id) as total,
|
count(distinct r.id) as total,
|
||||||
r.green,
|
r.green,
|
||||||
|
@ -53,20 +53,20 @@ select
|
||||||
rfc.count > 1 as multiple_funders,
|
rfc.count > 1 as multiple_funders,
|
||||||
r.type,
|
r.type,
|
||||||
r.year
|
r.year
|
||||||
from SOURCE.result r
|
from ${stats_db_name}.result r
|
||||||
join SOURCE.result_organization ro on ro.id=r.id
|
join ${stats_db_name}.result_organization ro on ro.id=r.id
|
||||||
join SOURCE.organization o on o.id=ro.organization
|
join ${stats_db_name}.organization o on o.id=ro.organization
|
||||||
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
|
||||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
|
||||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
|
||||||
left outer join SOURCE.result_cc_licence rln on rln.id=r.id
|
left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id
|
||||||
left outer join SOURCE.result_projectcount rpc on rpc.id=r.id
|
left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
|
||||||
left outer join SOURCE.result_fundercount rfc on rfc.id=r.id
|
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, r.year;
|
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, r.year;
|
||||||
|
|
||||||
create table TARGET.result_affiliated_year_country stored as parquet as
|
create table ${observatory_db_name}.result_affiliated_year_country stored as parquet as
|
||||||
select
|
select
|
||||||
count(distinct r.id) as total,
|
count(distinct r.id) as total,
|
||||||
r.green,
|
r.green,
|
||||||
|
@ -82,20 +82,20 @@ select
|
||||||
rfc.count > 1 as multiple_funders,
|
rfc.count > 1 as multiple_funders,
|
||||||
r.type,
|
r.type,
|
||||||
r.year, c.code as ccode, c.name as cname
|
r.year, c.code as ccode, c.name as cname
|
||||||
from SOURCE.result r
|
from ${stats_db_name}.result r
|
||||||
join SOURCE.result_organization ro on ro.id=r.id
|
join ${stats_db_name}.result_organization ro on ro.id=r.id
|
||||||
join SOURCE.organization o on o.id=ro.organization
|
join ${stats_db_name}.organization o on o.id=ro.organization
|
||||||
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
|
||||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
|
||||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
|
||||||
left outer join SOURCE.result_cc_licence rln on rln.id=r.id
|
left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id
|
||||||
left outer join SOURCE.result_projectcount rpc on rpc.id=r.id
|
left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
|
||||||
left outer join SOURCE.result_fundercount rfc on rfc.id=r.id
|
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, r.year, c.code, c.name;
|
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, r.year, c.code, c.name;
|
||||||
|
|
||||||
create table TARGET.result_affiliated_datasource stored as parquet as
|
create table ${observatory_db_name}.result_affiliated_datasource stored as parquet as
|
||||||
select
|
select
|
||||||
count(distinct r.id) as total,
|
count(distinct r.id) as total,
|
||||||
r.green,
|
r.green,
|
||||||
|
@ -111,22 +111,22 @@ select
|
||||||
rfc.count > 1 as multiple_funders,
|
rfc.count > 1 as multiple_funders,
|
||||||
r.type,
|
r.type,
|
||||||
d.name as dname
|
d.name as dname
|
||||||
from SOURCE.result r
|
from ${stats_db_name}.result r
|
||||||
join SOURCE.result_organization ro on ro.id=r.id
|
join ${stats_db_name}.result_organization ro on ro.id=r.id
|
||||||
join SOURCE.organization o on o.id=ro.organization
|
join ${stats_db_name}.organization o on o.id=ro.organization
|
||||||
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
|
||||||
left outer join SOURCE.result_datasources rd on rd.id=r.id
|
left outer join ${stats_db_name}.result_datasources rd on rd.id=r.id
|
||||||
left outer join SOURCE.datasource d on d.id=rd.datasource
|
left outer join ${stats_db_name}.datasource d on d.id=rd.datasource
|
||||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
|
||||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
|
||||||
left outer join SOURCE.result_cc_licence rln on rln.id=r.id
|
left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id
|
||||||
left outer join SOURCE.result_projectcount rpc on rpc.id=r.id
|
left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
|
||||||
left outer join SOURCE.result_fundercount rfc on rfc.id=r.id
|
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, d.name;
|
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, d.name;
|
||||||
|
|
||||||
create table TARGET.result_affiliated_datasource_country stored as parquet as
|
create table ${observatory_db_name}.result_affiliated_datasource_country stored as parquet as
|
||||||
select
|
select
|
||||||
count(distinct r.id) as total,
|
count(distinct r.id) as total,
|
||||||
r.green,
|
r.green,
|
||||||
|
@ -142,22 +142,22 @@ select
|
||||||
rfc.count > 1 as multiple_funders,
|
rfc.count > 1 as multiple_funders,
|
||||||
r.type,
|
r.type,
|
||||||
d.name as dname, c.code as ccode, c.name as cname
|
d.name as dname, c.code as ccode, c.name as cname
|
||||||
from SOURCE.result r
|
from ${stats_db_name}.result r
|
||||||
join SOURCE.result_organization ro on ro.id=r.id
|
join ${stats_db_name}.result_organization ro on ro.id=r.id
|
||||||
join SOURCE.organization o on o.id=ro.organization
|
join ${stats_db_name}.organization o on o.id=ro.organization
|
||||||
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
|
||||||
left outer join SOURCE.result_datasources rd on rd.id=r.id
|
left outer join ${stats_db_name}.result_datasources rd on rd.id=r.id
|
||||||
left outer join SOURCE.datasource d on d.id=rd.datasource
|
left outer join ${stats_db_name}.datasource d on d.id=rd.datasource
|
||||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
|
||||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
|
||||||
left outer join SOURCE.result_cc_licence rln on rln.id=r.id
|
left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id
|
||||||
left outer join SOURCE.result_projectcount rpc on rpc.id=r.id
|
left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
|
||||||
left outer join SOURCE.result_fundercount rfc on rfc.id=r.id
|
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, d.name, c.code, c.name;
|
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, d.name, c.code, c.name;
|
||||||
|
|
||||||
create table TARGET.result_affiliated_organization stored as parquet as
|
create table ${observatory_db_name}.result_affiliated_organization stored as parquet as
|
||||||
select
|
select
|
||||||
count(distinct r.id) as total,
|
count(distinct r.id) as total,
|
||||||
r.green,
|
r.green,
|
||||||
|
@ -173,20 +173,20 @@ select
|
||||||
rfc.count > 1 as multiple_funders,
|
rfc.count > 1 as multiple_funders,
|
||||||
r.type,
|
r.type,
|
||||||
o.name as oname
|
o.name as oname
|
||||||
from SOURCE.result r
|
from ${stats_db_name}.result r
|
||||||
join SOURCE.result_organization ro on ro.id=r.id
|
join ${stats_db_name}.result_organization ro on ro.id=r.id
|
||||||
join SOURCE.organization o on o.id=ro.organization
|
join ${stats_db_name}.organization o on o.id=ro.organization
|
||||||
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
|
||||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
|
||||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
|
||||||
left outer join SOURCE.result_cc_licence rln on rln.id=r.id
|
left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id
|
||||||
left outer join SOURCE.result_projectcount rpc on rpc.id=r.id
|
left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
|
||||||
left outer join SOURCE.result_fundercount rfc on rfc.id=r.id
|
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, o.name;
|
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, o.name;
|
||||||
|
|
||||||
create table TARGET.result_affiliated_organization_country stored as parquet as
|
create table ${observatory_db_name}.result_affiliated_organization_country stored as parquet as
|
||||||
select
|
select
|
||||||
count(distinct r.id) as total,
|
count(distinct r.id) as total,
|
||||||
r.green,
|
r.green,
|
||||||
|
@ -202,20 +202,20 @@ select
|
||||||
rfc.count > 1 as multiple_funders,
|
rfc.count > 1 as multiple_funders,
|
||||||
r.type,
|
r.type,
|
||||||
o.name as oname, c.code as ccode, c.name as cname
|
o.name as oname, c.code as ccode, c.name as cname
|
||||||
from SOURCE.result r
|
from ${stats_db_name}.result r
|
||||||
join SOURCE.result_organization ro on ro.id=r.id
|
join ${stats_db_name}.result_organization ro on ro.id=r.id
|
||||||
join SOURCE.organization o on o.id=ro.organization
|
join ${stats_db_name}.organization o on o.id=ro.organization
|
||||||
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
|
||||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
|
||||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
|
||||||
left outer join SOURCE.result_cc_licence rln on rln.id=r.id
|
left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id
|
||||||
left outer join SOURCE.result_projectcount rpc on rpc.id=r.id
|
left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
|
||||||
left outer join SOURCE.result_fundercount rfc on rfc.id=r.id
|
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, o.name, c.code, c.name;
|
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, o.name, c.code, c.name;
|
||||||
|
|
||||||
create table TARGET.result_affiliated_funder stored as parquet as
|
create table ${observatory_db_name}.result_affiliated_funder stored as parquet as
|
||||||
select
|
select
|
||||||
count(distinct r.id) as total,
|
count(distinct r.id) as total,
|
||||||
r.green,
|
r.green,
|
||||||
|
@ -231,22 +231,22 @@ select
|
||||||
rfc.count > 1 as multiple_funders,
|
rfc.count > 1 as multiple_funders,
|
||||||
r.type,
|
r.type,
|
||||||
p.funder as pfunder
|
p.funder as pfunder
|
||||||
from SOURCE.result r
|
from ${stats_db_name}.result r
|
||||||
join SOURCE.result_organization ro on ro.id=r.id
|
join ${stats_db_name}.result_organization ro on ro.id=r.id
|
||||||
join SOURCE.organization o on o.id=ro.organization
|
join ${stats_db_name}.organization o on o.id=ro.organization
|
||||||
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
|
||||||
join SOURCE.result_projects rp on rp.id=r.id
|
join ${stats_db_name}.result_projects rp on rp.id=r.id
|
||||||
join SOURCE.project p on p.id=rp.project
|
join ${stats_db_name}.project p on p.id=rp.project
|
||||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
|
||||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
|
||||||
left outer join SOURCE.result_cc_licence rln on rln.id=r.id
|
left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id
|
||||||
left outer join SOURCE.result_projectcount rpc on rpc.id=r.id
|
left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
|
||||||
left outer join SOURCE.result_fundercount rfc on rfc.id=r.id
|
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, p.funder;
|
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, p.funder;
|
||||||
|
|
||||||
create table TARGET.result_affiliated_funder_country stored as parquet as
|
create table ${observatory_db_name}.result_affiliated_funder_country stored as parquet as
|
||||||
select
|
select
|
||||||
count(distinct r.id) as total,
|
count(distinct r.id) as total,
|
||||||
r.green,
|
r.green,
|
||||||
|
@ -262,22 +262,22 @@ select
|
||||||
rfc.count > 1 as multiple_funders,
|
rfc.count > 1 as multiple_funders,
|
||||||
r.type,
|
r.type,
|
||||||
p.funder as pfunder, c.code as ccode, c.name as cname
|
p.funder as pfunder, c.code as ccode, c.name as cname
|
||||||
from SOURCE.result r
|
from ${stats_db_name}.result r
|
||||||
join SOURCE.result_organization ro on ro.id=r.id
|
join ${stats_db_name}.result_organization ro on ro.id=r.id
|
||||||
join SOURCE.organization o on o.id=ro.organization
|
join ${stats_db_name}.organization o on o.id=ro.organization
|
||||||
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
|
||||||
join SOURCE.result_projects rp on rp.id=r.id
|
join ${stats_db_name}.result_projects rp on rp.id=r.id
|
||||||
join SOURCE.project p on p.id=rp.project
|
join ${stats_db_name}.project p on p.id=rp.project
|
||||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
|
||||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
|
||||||
left outer join SOURCE.result_cc_licence rln on rln.id=r.id
|
left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id
|
||||||
left outer join SOURCE.result_projectcount rpc on rpc.id=r.id
|
left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
|
||||||
left outer join SOURCE.result_fundercount rfc on rfc.id=r.id
|
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, p.funder, c.code, c.name;
|
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, p.funder, c.code, c.name;
|
||||||
|
|
||||||
create table TARGET.result_deposited_country stored as parquet as
|
create table ${observatory_db_name}.result_deposited_country stored as parquet as
|
||||||
select
|
select
|
||||||
count(distinct r.id) as total,
|
count(distinct r.id) as total,
|
||||||
r.green,
|
r.green,
|
||||||
|
@ -293,22 +293,22 @@ select
|
||||||
rfc.count > 1 as multiple_funders,
|
rfc.count > 1 as multiple_funders,
|
||||||
r.type,
|
r.type,
|
||||||
c.code as ccode, c.name as cname
|
c.code as ccode, c.name as cname
|
||||||
from SOURCE.result r
|
from ${stats_db_name}.result r
|
||||||
join SOURCE.result_datasources rd on rd.id=r.id
|
join ${stats_db_name}.result_datasources rd on rd.id=r.id
|
||||||
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
join ${stats_db_name}.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
||||||
join SOURCE.datasource_organizations dor on dor.id=d.id
|
join ${stats_db_name}.datasource_organizations dor on dor.id=d.id
|
||||||
join SOURCE.organization o on o.id=dor.organization
|
join ${stats_db_name}.organization o on o.id=dor.organization
|
||||||
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
|
||||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
|
||||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
|
||||||
left outer join SOURCE.result_cc_licence rln on rln.id=r.id
|
left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id
|
||||||
left outer join SOURCE.result_projectcount rpc on rpc.id=r.id
|
left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
|
||||||
left outer join SOURCE.result_fundercount rfc on rfc.id=r.id
|
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, c.code, c.name;
|
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, c.code, c.name;
|
||||||
|
|
||||||
create table TARGET.result_deposited_year stored as parquet as
|
create table ${observatory_db_name}.result_deposited_year stored as parquet as
|
||||||
select
|
select
|
||||||
count(distinct r.id) as total,
|
count(distinct r.id) as total,
|
||||||
r.green,
|
r.green,
|
||||||
|
@ -324,22 +324,22 @@ select
|
||||||
rfc.count > 1 as multiple_funders,
|
rfc.count > 1 as multiple_funders,
|
||||||
r.type,
|
r.type,
|
||||||
r.year
|
r.year
|
||||||
from SOURCE.result r
|
from ${stats_db_name}.result r
|
||||||
join SOURCE.result_datasources rd on rd.id=r.id
|
join ${stats_db_name}.result_datasources rd on rd.id=r.id
|
||||||
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
join ${stats_db_name}.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
||||||
join SOURCE.datasource_organizations dor on dor.id=d.id
|
join ${stats_db_name}.datasource_organizations dor on dor.id=d.id
|
||||||
join SOURCE.organization o on o.id=dor.organization
|
join ${stats_db_name}.organization o on o.id=dor.organization
|
||||||
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
|
||||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
|
||||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
|
||||||
left outer join SOURCE.result_cc_licence rln on rln.id=r.id
|
left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id
|
||||||
left outer join SOURCE.result_projectcount rpc on rpc.id=r.id
|
left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
|
||||||
left outer join SOURCE.result_fundercount rfc on rfc.id=r.id
|
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, r.year;
|
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, r.year;
|
||||||
|
|
||||||
create table TARGET.result_deposited_year_country stored as parquet as
|
create table ${observatory_db_name}.result_deposited_year_country stored as parquet as
|
||||||
select
|
select
|
||||||
count(distinct r.id) as total,
|
count(distinct r.id) as total,
|
||||||
r.green,
|
r.green,
|
||||||
|
@ -355,22 +355,22 @@ select
|
||||||
rfc.count > 1 as multiple_funders,
|
rfc.count > 1 as multiple_funders,
|
||||||
r.type,
|
r.type,
|
||||||
r.year, c.code as ccode, c.name as cname
|
r.year, c.code as ccode, c.name as cname
|
||||||
from SOURCE.result r
|
from ${stats_db_name}.result r
|
||||||
join SOURCE.result_datasources rd on rd.id=r.id
|
join ${stats_db_name}.result_datasources rd on rd.id=r.id
|
||||||
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
join ${stats_db_name}.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
||||||
join SOURCE.datasource_organizations dor on dor.id=d.id
|
join ${stats_db_name}.datasource_organizations dor on dor.id=d.id
|
||||||
join SOURCE.organization o on o.id=dor.organization
|
join ${stats_db_name}.organization o on o.id=dor.organization
|
||||||
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
|
||||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
|
||||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
|
||||||
left outer join SOURCE.result_cc_licence rln on rln.id=r.id
|
left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id
|
||||||
left outer join SOURCE.result_projectcount rpc on rpc.id=r.id
|
left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
|
||||||
left outer join SOURCE.result_fundercount rfc on rfc.id=r.id
|
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, r.year, c.code, c.name;
|
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, r.year, c.code, c.name;
|
||||||
|
|
||||||
create table TARGET.result_deposited_datasource stored as parquet as
|
create table ${observatory_db_name}.result_deposited_datasource stored as parquet as
|
||||||
select
|
select
|
||||||
count(distinct r.id) as total,
|
count(distinct r.id) as total,
|
||||||
r.green,
|
r.green,
|
||||||
|
@ -386,22 +386,22 @@ select
|
||||||
rfc.count > 1 as multiple_funders,
|
rfc.count > 1 as multiple_funders,
|
||||||
r.type,
|
r.type,
|
||||||
d.name as dname
|
d.name as dname
|
||||||
from SOURCE.result r
|
from ${stats_db_name}.result r
|
||||||
join SOURCE.result_datasources rd on rd.id=r.id
|
join ${stats_db_name}.result_datasources rd on rd.id=r.id
|
||||||
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
join ${stats_db_name}.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
||||||
join SOURCE.datasource_organizations dor on dor.id=d.id
|
join ${stats_db_name}.datasource_organizations dor on dor.id=d.id
|
||||||
join SOURCE.organization o on o.id=dor.organization
|
join ${stats_db_name}.organization o on o.id=dor.organization
|
||||||
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
|
||||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
|
||||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
|
||||||
left outer join SOURCE.result_cc_licence rln on rln.id=r.id
|
left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id
|
||||||
left outer join SOURCE.result_projectcount rpc on rpc.id=r.id
|
left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
|
||||||
left outer join SOURCE.result_fundercount rfc on rfc.id=r.id
|
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, d.name;
|
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, d.name;
|
||||||
|
|
||||||
create table TARGET.result_deposited_datasource_country stored as parquet as
|
create table ${observatory_db_name}.result_deposited_datasource_country stored as parquet as
|
||||||
select
|
select
|
||||||
count(distinct r.id) as total,
|
count(distinct r.id) as total,
|
||||||
r.green,
|
r.green,
|
||||||
|
@ -417,22 +417,22 @@ select
|
||||||
rfc.count > 1 as multiple_funders,
|
rfc.count > 1 as multiple_funders,
|
||||||
r.type,
|
r.type,
|
||||||
d.name as dname, c.code as ccode, c.name as cname
|
d.name as dname, c.code as ccode, c.name as cname
|
||||||
from SOURCE.result r
|
from ${stats_db_name}.result r
|
||||||
join SOURCE.result_datasources rd on rd.id=r.id
|
join ${stats_db_name}.result_datasources rd on rd.id=r.id
|
||||||
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
join ${stats_db_name}.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
||||||
join SOURCE.datasource_organizations dor on dor.id=d.id
|
join ${stats_db_name}.datasource_organizations dor on dor.id=d.id
|
||||||
join SOURCE.organization o on o.id=dor.organization
|
join ${stats_db_name}.organization o on o.id=dor.organization
|
||||||
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
|
||||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
|
||||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
|
||||||
left outer join SOURCE.result_cc_licence rln on rln.id=r.id
|
left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id
|
||||||
left outer join SOURCE.result_projectcount rpc on rpc.id=r.id
|
left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
|
||||||
left outer join SOURCE.result_fundercount rfc on rfc.id=r.id
|
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, d.name, c.code, c.name;
|
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, d.name, c.code, c.name;
|
||||||
|
|
||||||
create table TARGET.result_deposited_organization stored as parquet as
|
create table ${observatory_db_name}.result_deposited_organization stored as parquet as
|
||||||
select
|
select
|
||||||
count(distinct r.id) as total,
|
count(distinct r.id) as total,
|
||||||
r.green,
|
r.green,
|
||||||
|
@ -448,22 +448,22 @@ select
|
||||||
rfc.count > 1 as multiple_funders,
|
rfc.count > 1 as multiple_funders,
|
||||||
r.type,
|
r.type,
|
||||||
o.name as oname
|
o.name as oname
|
||||||
from SOURCE.result r
|
from ${stats_db_name}.result r
|
||||||
join SOURCE.result_datasources rd on rd.id=r.id
|
join ${stats_db_name}.result_datasources rd on rd.id=r.id
|
||||||
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
join ${stats_db_name}.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
||||||
join SOURCE.datasource_organizations dor on dor.id=d.id
|
join ${stats_db_name}.datasource_organizations dor on dor.id=d.id
|
||||||
join SOURCE.organization o on o.id=dor.organization
|
join ${stats_db_name}.organization o on o.id=dor.organization
|
||||||
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
|
||||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
|
||||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
|
||||||
left outer join SOURCE.result_cc_licence rln on rln.id=r.id
|
left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id
|
||||||
left outer join SOURCE.result_projectcount rpc on rpc.id=r.id
|
left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
|
||||||
left outer join SOURCE.result_fundercount rfc on rfc.id=r.id
|
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, o.name;
|
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, o.name;
|
||||||
|
|
||||||
create table TARGET.result_deposited_organization_country stored as parquet as
|
create table ${observatory_db_name}.result_deposited_organization_country stored as parquet as
|
||||||
select
|
select
|
||||||
count(distinct r.id) as total,
|
count(distinct r.id) as total,
|
||||||
r.green,
|
r.green,
|
||||||
|
@ -479,22 +479,22 @@ select
|
||||||
rfc.count > 1 as multiple_funders,
|
rfc.count > 1 as multiple_funders,
|
||||||
r.type,
|
r.type,
|
||||||
o.name as oname, c.code as ccode, c.name as cname
|
o.name as oname, c.code as ccode, c.name as cname
|
||||||
from SOURCE.result r
|
from ${stats_db_name}.result r
|
||||||
join SOURCE.result_datasources rd on rd.id=r.id
|
join ${stats_db_name}.result_datasources rd on rd.id=r.id
|
||||||
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
join ${stats_db_name}.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
||||||
join SOURCE.datasource_organizations dor on dor.id=d.id
|
join ${stats_db_name}.datasource_organizations dor on dor.id=d.id
|
||||||
join SOURCE.organization o on o.id=dor.organization
|
join ${stats_db_name}.organization o on o.id=dor.organization
|
||||||
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
|
||||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
|
||||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
|
||||||
left outer join SOURCE.result_cc_licence rln on rln.id=r.id
|
left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id
|
||||||
left outer join SOURCE.result_projectcount rpc on rpc.id=r.id
|
left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
|
||||||
left outer join SOURCE.result_fundercount rfc on rfc.id=r.id
|
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, o.name, c.code, c.name;
|
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, o.name, c.code, c.name;
|
||||||
|
|
||||||
create table TARGET.result_deposited_funder stored as parquet as
|
create table ${observatory_db_name}.result_deposited_funder stored as parquet as
|
||||||
select
|
select
|
||||||
count(distinct r.id) as total,
|
count(distinct r.id) as total,
|
||||||
r.green,
|
r.green,
|
||||||
|
@ -510,24 +510,24 @@ select
|
||||||
rfc.count > 1 as multiple_funders,
|
rfc.count > 1 as multiple_funders,
|
||||||
r.type,
|
r.type,
|
||||||
p.funder as pfunder
|
p.funder as pfunder
|
||||||
from SOURCE.result r
|
from ${stats_db_name}.result r
|
||||||
join SOURCE.result_datasources rd on rd.id=r.id
|
join ${stats_db_name}.result_datasources rd on rd.id=r.id
|
||||||
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
join ${stats_db_name}.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
||||||
join SOURCE.datasource_organizations dor on dor.id=d.id
|
join ${stats_db_name}.datasource_organizations dor on dor.id=d.id
|
||||||
join SOURCE.organization o on o.id=dor.organization
|
join ${stats_db_name}.organization o on o.id=dor.organization
|
||||||
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
|
||||||
join SOURCE.result_projects rp on rp.id=r.id
|
join ${stats_db_name}.result_projects rp on rp.id=r.id
|
||||||
join SOURCE.project p on p.id=rp.project
|
join ${stats_db_name}.project p on p.id=rp.project
|
||||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
|
||||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
|
||||||
left outer join SOURCE.result_cc_licence rln on rln.id=r.id
|
left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id
|
||||||
left outer join SOURCE.result_projectcount rpc on rpc.id=r.id
|
left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
|
||||||
left outer join SOURCE.result_fundercount rfc on rfc.id=r.id
|
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, p.funder;
|
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, p.funder;
|
||||||
|
|
||||||
create table TARGET.result_deposited_funder_country stored as parquet as
|
create table ${observatory_db_name}.result_deposited_funder_country stored as parquet as
|
||||||
select
|
select
|
||||||
count(distinct r.id) as total,
|
count(distinct r.id) as total,
|
||||||
r.green,
|
r.green,
|
||||||
|
@ -543,38 +543,19 @@ select
|
||||||
rfc.count > 1 as multiple_funders,
|
rfc.count > 1 as multiple_funders,
|
||||||
r.type,
|
r.type,
|
||||||
p.funder as pfunder, c.code as ccode, c.name as cname
|
p.funder as pfunder, c.code as ccode, c.name as cname
|
||||||
from SOURCE.result r
|
from ${stats_db_name}.result r
|
||||||
join SOURCE.result_datasources rd on rd.id=r.id
|
join ${stats_db_name}.result_datasources rd on rd.id=r.id
|
||||||
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
join ${stats_db_name}.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
||||||
join SOURCE.datasource_organizations dor on dor.id=d.id
|
join ${stats_db_name}.datasource_organizations dor on dor.id=d.id
|
||||||
join SOURCE.organization o on o.id=dor.organization
|
join ${stats_db_name}.organization o on o.id=dor.organization
|
||||||
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
|
||||||
join SOURCE.result_projects rp on rp.id=r.id
|
join ${stats_db_name}.result_projects rp on rp.id=r.id
|
||||||
join SOURCE.project p on p.id=rp.project
|
join ${stats_db_name}.project p on p.id=rp.project
|
||||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
|
||||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
|
||||||
left outer join SOURCE.result_cc_licence rln on rln.id=r.id
|
left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id
|
||||||
left outer join SOURCE.result_projectcount rpc on rpc.id=r.id
|
left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
|
||||||
left outer join SOURCE.result_fundercount rfc on rfc.id=r.id
|
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, p.funder, c.code, c.name;
|
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, p.funder, c.code, c.name;
|
||||||
|
|
||||||
-- compute stats TARGET.result_affiliated_country;
|
|
||||||
-- compute stats TARGET.result_affiliated_year;
|
|
||||||
-- compute stats TARGET.result_affiliated_year_country;
|
|
||||||
-- compute stats TARGET.result_affiliated_datasource;
|
|
||||||
-- compute stats TARGET.result_affiliated_datasource_country;
|
|
||||||
-- compute stats TARGET.result_affiliated_organization;
|
|
||||||
-- compute stats TARGET.result_affiliated_organization_country;
|
|
||||||
-- compute stats TARGET.result_affiliated_funder;
|
|
||||||
-- compute stats TARGET.result_affiliated_funder_country;
|
|
||||||
-- compute stats TARGET.result_deposited_country;
|
|
||||||
-- compute stats TARGET.result_deposited_year;
|
|
||||||
-- compute stats TARGET.result_deposited_year_country;
|
|
||||||
-- compute stats TARGET.result_deposited_datasource;
|
|
||||||
-- compute stats TARGET.result_deposited_datasource_country;
|
|
||||||
-- compute stats TARGET.result_deposited_organization;
|
|
||||||
-- compute stats TARGET.result_deposited_organization_country;
|
|
||||||
-- compute stats TARGET.result_deposited_funder;
|
|
||||||
-- compute stats TARGET.result_deposited_funder_country;
|
|
|
@ -326,20 +326,44 @@
|
||||||
<argument>${wf:appPath()}/scripts/step20-createMonitorDB.sql</argument>
|
<argument>${wf:appPath()}/scripts/step20-createMonitorDB.sql</argument>
|
||||||
<file>monitor.sh</file>
|
<file>monitor.sh</file>
|
||||||
</shell>
|
</shell>
|
||||||
|
<ok to="step21-createObservatoryDB-pre"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="step21-createObservatoryDB-pre">
|
||||||
|
<shell xmlns="uri:oozie:shell-action:0.1">
|
||||||
|
<job-tracker>${jobTracker}</job-tracker>
|
||||||
|
<name-node>${nameNode}</name-node>
|
||||||
|
<exec>observatory-pre.sh</exec>
|
||||||
|
<argument>${stats_db_name}</argument>
|
||||||
|
<argument>${observatory_db_name}</argument>
|
||||||
|
<argument>${observatory_db_shadow_name}</argument>
|
||||||
|
<file>observatory-pre.sh</file>
|
||||||
|
</shell>
|
||||||
<ok to="step21-createObservatoryDB"/>
|
<ok to="step21-createObservatoryDB"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
<action name="step21-createObservatoryDB">
|
<action name="step21-createObservatoryDB">
|
||||||
|
<hive2 xmlns="uri:oozie:hive2-action:0.1">
|
||||||
|
<jdbc-url>${hive_jdbc_url}</jdbc-url>
|
||||||
|
<script>scripts/step21-createObservatoryDB.sql</script>
|
||||||
|
<param>stats_db_name=${stats_db_name}</param>
|
||||||
|
<param>observatory_db_name=${observatory_db_name}</param>
|
||||||
|
</hive2>
|
||||||
|
<ok to="step21-createObservatoryDB-post"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="step21-createObservatoryDB-post">
|
||||||
<shell xmlns="uri:oozie:shell-action:0.1">
|
<shell xmlns="uri:oozie:shell-action:0.1">
|
||||||
<job-tracker>${jobTracker}</job-tracker>
|
<job-tracker>${jobTracker}</job-tracker>
|
||||||
<name-node>${nameNode}</name-node>
|
<name-node>${nameNode}</name-node>
|
||||||
<exec>observatory.sh</exec>
|
<exec>observatory-post.sh</exec>
|
||||||
<argument>${stats_db_name}</argument>
|
<argument>${stats_db_name}</argument>
|
||||||
<argument>${observatory_db_name}</argument>
|
<argument>${observatory_db_name}</argument>
|
||||||
<argument>${observatory_db_shadow_name}</argument>
|
<argument>${observatory_db_shadow_name}</argument>
|
||||||
<argument>${wf:appPath()}/scripts/step21-createObservatoryDB.sql</argument>
|
<file>observatory-post.sh</file>
|
||||||
<file>observatory.sh</file>
|
|
||||||
</shell>
|
</shell>
|
||||||
<ok to="Step22"/>
|
<ok to="Step22"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
|
|
Loading…
Reference in New Issue