created hive action for observatory queries

This commit is contained in:
Antonis Lempesis 2021-09-21 03:07:58 +03:00
parent 8b681dcf1b
commit 421d55265d
4 changed files with 258 additions and 246 deletions

View File

@ -9,16 +9,7 @@ fi
export SOURCE=$1 export SOURCE=$1
export TARGET=$2 export TARGET=$2
export SHADOW=$3 export SHADOW=$3
export SCRIPT_PATH=$4
echo "Getting file from " $4
hdfs dfs -copyToLocal $4
echo "Creating observatory database"
impala-shell -q "drop database if exists ${TARGET} cascade"
impala-shell -q "create database if not exists ${TARGET}"
impala-shell -d ${SOURCE} -q "show tables" --delimited | sed "s/\(.*\)/create view ${TARGET}.\1 as select * from ${SOURCE}.\1;/" | impala-shell -f -
cat step21-createObservatoryDB.sql | sed s/SOURCE/$1/g | sed s/TARGET/$2/g1 | hive -f -
impala-shell -q "invalidate metadata;" impala-shell -q "invalidate metadata;"
impala-shell -d ${TARGET} -q "show tables" --delimited | sed "s/\(.*\)/compute stats ${TARGET}.\1;/" | impala-shell -f - impala-shell -d ${TARGET} -q "show tables" --delimited | sed "s/\(.*\)/compute stats ${TARGET}.\1;/" | impala-shell -f -
echo "Impala shell finished" echo "Impala shell finished"

View File

@ -0,0 +1,16 @@
export PYTHON_EGG_CACHE=/home/$(whoami)/.python-eggs
export link_folder=/tmp/impala-shell-python-egg-cache-$(whoami)
if ! [ -L $link_folder ]
then
rm -Rf "$link_folder"
ln -sfn ${PYTHON_EGG_CACHE}${link_folder} ${link_folder}
fi
export SOURCE=$1
export TARGET=$2
export SHADOW=$3
echo "Creating observatory database"
impala-shell -q "drop database if exists ${TARGET} cascade"
impala-shell -q "create database if not exists ${TARGET}"
impala-shell -d ${SOURCE} -q "show tables" --delimited | sed "s/\(.*\)/create view ${TARGET}.\1 as select * from ${SOURCE}.\1;/" | impala-shell -f -

View File

@ -1,14 +1,14 @@
create table TARGET.result_cc_licence stored as parquet as create table ${observatory_db_name}.result_cc_licence stored as parquet as
select r.id, coalesce(rln.count, 0) > 0 as cc_licence select r.id, coalesce(rln.count, 0) > 0 as cc_licence
from SOURCE.result r from ${stats_db_name}.result r
left outer join ( left outer join (
select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count
from SOURCE.result_licenses rl from ${stats_db_name}.result_licenses rl
left outer join SOURCE.licenses_normalized rln on rl.type=rln.license left outer join ${stats_db_name}.licenses_normalized rln on rl.type=rln.license
group by rl.id group by rl.id
) rln on rln.id=r.id; ) rln on rln.id=r.id;
create table TARGET.result_affiliated_country stored as parquet as create table ${observatory_db_name}.result_affiliated_country stored as parquet as
select select
count(distinct r.id) as total, count(distinct r.id) as total,
r.green, r.green,
@ -24,20 +24,20 @@ select
rfc.count > 1 as multiple_funders, rfc.count > 1 as multiple_funders,
r.type, r.type,
c.code as ccode, c.name as cname c.code as ccode, c.name as cname
from SOURCE.result r from ${stats_db_name}.result r
join SOURCE.result_organization ro on ro.id=r.id join ${stats_db_name}.result_organization ro on ro.id=r.id
join SOURCE.organization o on o.id=ro.organization join ${stats_db_name}.organization o on o.id=ro.organization
join SOURCE.country c on c.code=o.country and c.continent_name='Europe' join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
left outer join SOURCE.result_pids pids on pids.id=r.id left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
left outer join SOURCE.result_cc_licence rln on rln.id=r.id left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id
left outer join SOURCE.result_projectcount rpc on rpc.id=r.id left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
left outer join SOURCE.result_fundercount rfc on rfc.id=r.id left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, c.code, c.name; cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, c.code, c.name;
create table TARGET.result_affiliated_year stored as parquet as create table ${observatory_db_name}.result_affiliated_year stored as parquet as
select select
count(distinct r.id) as total, count(distinct r.id) as total,
r.green, r.green,
@ -53,20 +53,20 @@ select
rfc.count > 1 as multiple_funders, rfc.count > 1 as multiple_funders,
r.type, r.type,
r.year r.year
from SOURCE.result r from ${stats_db_name}.result r
join SOURCE.result_organization ro on ro.id=r.id join ${stats_db_name}.result_organization ro on ro.id=r.id
join SOURCE.organization o on o.id=ro.organization join ${stats_db_name}.organization o on o.id=ro.organization
join SOURCE.country c on c.code=o.country and c.continent_name='Europe' join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
left outer join SOURCE.result_pids pids on pids.id=r.id left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
left outer join SOURCE.result_cc_licence rln on rln.id=r.id left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id
left outer join SOURCE.result_projectcount rpc on rpc.id=r.id left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
left outer join SOURCE.result_fundercount rfc on rfc.id=r.id left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, r.year; cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, r.year;
create table TARGET.result_affiliated_year_country stored as parquet as create table ${observatory_db_name}.result_affiliated_year_country stored as parquet as
select select
count(distinct r.id) as total, count(distinct r.id) as total,
r.green, r.green,
@ -82,20 +82,20 @@ select
rfc.count > 1 as multiple_funders, rfc.count > 1 as multiple_funders,
r.type, r.type,
r.year, c.code as ccode, c.name as cname r.year, c.code as ccode, c.name as cname
from SOURCE.result r from ${stats_db_name}.result r
join SOURCE.result_organization ro on ro.id=r.id join ${stats_db_name}.result_organization ro on ro.id=r.id
join SOURCE.organization o on o.id=ro.organization join ${stats_db_name}.organization o on o.id=ro.organization
join SOURCE.country c on c.code=o.country and c.continent_name='Europe' join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
left outer join SOURCE.result_pids pids on pids.id=r.id left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
left outer join SOURCE.result_cc_licence rln on rln.id=r.id left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id
left outer join SOURCE.result_projectcount rpc on rpc.id=r.id left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
left outer join SOURCE.result_fundercount rfc on rfc.id=r.id left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, r.year, c.code, c.name; cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, r.year, c.code, c.name;
create table TARGET.result_affiliated_datasource stored as parquet as create table ${observatory_db_name}.result_affiliated_datasource stored as parquet as
select select
count(distinct r.id) as total, count(distinct r.id) as total,
r.green, r.green,
@ -111,22 +111,22 @@ select
rfc.count > 1 as multiple_funders, rfc.count > 1 as multiple_funders,
r.type, r.type,
d.name as dname d.name as dname
from SOURCE.result r from ${stats_db_name}.result r
join SOURCE.result_organization ro on ro.id=r.id join ${stats_db_name}.result_organization ro on ro.id=r.id
join SOURCE.organization o on o.id=ro.organization join ${stats_db_name}.organization o on o.id=ro.organization
join SOURCE.country c on c.code=o.country and c.continent_name='Europe' join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
left outer join SOURCE.result_datasources rd on rd.id=r.id left outer join ${stats_db_name}.result_datasources rd on rd.id=r.id
left outer join SOURCE.datasource d on d.id=rd.datasource left outer join ${stats_db_name}.datasource d on d.id=rd.datasource
left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
left outer join SOURCE.result_pids pids on pids.id=r.id left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
left outer join SOURCE.result_cc_licence rln on rln.id=r.id left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id
left outer join SOURCE.result_projectcount rpc on rpc.id=r.id left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
left outer join SOURCE.result_fundercount rfc on rfc.id=r.id left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, d.name; cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, d.name;
create table TARGET.result_affiliated_datasource_country stored as parquet as create table ${observatory_db_name}.result_affiliated_datasource_country stored as parquet as
select select
count(distinct r.id) as total, count(distinct r.id) as total,
r.green, r.green,
@ -142,22 +142,22 @@ select
rfc.count > 1 as multiple_funders, rfc.count > 1 as multiple_funders,
r.type, r.type,
d.name as dname, c.code as ccode, c.name as cname d.name as dname, c.code as ccode, c.name as cname
from SOURCE.result r from ${stats_db_name}.result r
join SOURCE.result_organization ro on ro.id=r.id join ${stats_db_name}.result_organization ro on ro.id=r.id
join SOURCE.organization o on o.id=ro.organization join ${stats_db_name}.organization o on o.id=ro.organization
join SOURCE.country c on c.code=o.country and c.continent_name='Europe' join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
left outer join SOURCE.result_datasources rd on rd.id=r.id left outer join ${stats_db_name}.result_datasources rd on rd.id=r.id
left outer join SOURCE.datasource d on d.id=rd.datasource left outer join ${stats_db_name}.datasource d on d.id=rd.datasource
left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
left outer join SOURCE.result_pids pids on pids.id=r.id left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
left outer join SOURCE.result_cc_licence rln on rln.id=r.id left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id
left outer join SOURCE.result_projectcount rpc on rpc.id=r.id left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
left outer join SOURCE.result_fundercount rfc on rfc.id=r.id left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, d.name, c.code, c.name; cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, d.name, c.code, c.name;
create table TARGET.result_affiliated_organization stored as parquet as create table ${observatory_db_name}.result_affiliated_organization stored as parquet as
select select
count(distinct r.id) as total, count(distinct r.id) as total,
r.green, r.green,
@ -173,20 +173,20 @@ select
rfc.count > 1 as multiple_funders, rfc.count > 1 as multiple_funders,
r.type, r.type,
o.name as oname o.name as oname
from SOURCE.result r from ${stats_db_name}.result r
join SOURCE.result_organization ro on ro.id=r.id join ${stats_db_name}.result_organization ro on ro.id=r.id
join SOURCE.organization o on o.id=ro.organization join ${stats_db_name}.organization o on o.id=ro.organization
join SOURCE.country c on c.code=o.country and c.continent_name='Europe' join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
left outer join SOURCE.result_pids pids on pids.id=r.id left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
left outer join SOURCE.result_cc_licence rln on rln.id=r.id left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id
left outer join SOURCE.result_projectcount rpc on rpc.id=r.id left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
left outer join SOURCE.result_fundercount rfc on rfc.id=r.id left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, o.name; cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, o.name;
create table TARGET.result_affiliated_organization_country stored as parquet as create table ${observatory_db_name}.result_affiliated_organization_country stored as parquet as
select select
count(distinct r.id) as total, count(distinct r.id) as total,
r.green, r.green,
@ -202,20 +202,20 @@ select
rfc.count > 1 as multiple_funders, rfc.count > 1 as multiple_funders,
r.type, r.type,
o.name as oname, c.code as ccode, c.name as cname o.name as oname, c.code as ccode, c.name as cname
from SOURCE.result r from ${stats_db_name}.result r
join SOURCE.result_organization ro on ro.id=r.id join ${stats_db_name}.result_organization ro on ro.id=r.id
join SOURCE.organization o on o.id=ro.organization join ${stats_db_name}.organization o on o.id=ro.organization
join SOURCE.country c on c.code=o.country and c.continent_name='Europe' join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
left outer join SOURCE.result_pids pids on pids.id=r.id left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
left outer join SOURCE.result_cc_licence rln on rln.id=r.id left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id
left outer join SOURCE.result_projectcount rpc on rpc.id=r.id left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
left outer join SOURCE.result_fundercount rfc on rfc.id=r.id left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, o.name, c.code, c.name; cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, o.name, c.code, c.name;
create table TARGET.result_affiliated_funder stored as parquet as create table ${observatory_db_name}.result_affiliated_funder stored as parquet as
select select
count(distinct r.id) as total, count(distinct r.id) as total,
r.green, r.green,
@ -231,22 +231,22 @@ select
rfc.count > 1 as multiple_funders, rfc.count > 1 as multiple_funders,
r.type, r.type,
p.funder as pfunder p.funder as pfunder
from SOURCE.result r from ${stats_db_name}.result r
join SOURCE.result_organization ro on ro.id=r.id join ${stats_db_name}.result_organization ro on ro.id=r.id
join SOURCE.organization o on o.id=ro.organization join ${stats_db_name}.organization o on o.id=ro.organization
join SOURCE.country c on c.code=o.country and c.continent_name='Europe' join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
join SOURCE.result_projects rp on rp.id=r.id join ${stats_db_name}.result_projects rp on rp.id=r.id
join SOURCE.project p on p.id=rp.project join ${stats_db_name}.project p on p.id=rp.project
left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
left outer join SOURCE.result_pids pids on pids.id=r.id left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
left outer join SOURCE.result_cc_licence rln on rln.id=r.id left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id
left outer join SOURCE.result_projectcount rpc on rpc.id=r.id left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
left outer join SOURCE.result_fundercount rfc on rfc.id=r.id left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, p.funder; cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, p.funder;
create table TARGET.result_affiliated_funder_country stored as parquet as create table ${observatory_db_name}.result_affiliated_funder_country stored as parquet as
select select
count(distinct r.id) as total, count(distinct r.id) as total,
r.green, r.green,
@ -262,22 +262,22 @@ select
rfc.count > 1 as multiple_funders, rfc.count > 1 as multiple_funders,
r.type, r.type,
p.funder as pfunder, c.code as ccode, c.name as cname p.funder as pfunder, c.code as ccode, c.name as cname
from SOURCE.result r from ${stats_db_name}.result r
join SOURCE.result_organization ro on ro.id=r.id join ${stats_db_name}.result_organization ro on ro.id=r.id
join SOURCE.organization o on o.id=ro.organization join ${stats_db_name}.organization o on o.id=ro.organization
join SOURCE.country c on c.code=o.country and c.continent_name='Europe' join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
join SOURCE.result_projects rp on rp.id=r.id join ${stats_db_name}.result_projects rp on rp.id=r.id
join SOURCE.project p on p.id=rp.project join ${stats_db_name}.project p on p.id=rp.project
left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
left outer join SOURCE.result_pids pids on pids.id=r.id left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
left outer join SOURCE.result_cc_licence rln on rln.id=r.id left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id
left outer join SOURCE.result_projectcount rpc on rpc.id=r.id left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
left outer join SOURCE.result_fundercount rfc on rfc.id=r.id left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, p.funder, c.code, c.name; cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, p.funder, c.code, c.name;
create table TARGET.result_deposited_country stored as parquet as create table ${observatory_db_name}.result_deposited_country stored as parquet as
select select
count(distinct r.id) as total, count(distinct r.id) as total,
r.green, r.green,
@ -293,22 +293,22 @@ select
rfc.count > 1 as multiple_funders, rfc.count > 1 as multiple_funders,
r.type, r.type,
c.code as ccode, c.name as cname c.code as ccode, c.name as cname
from SOURCE.result r from ${stats_db_name}.result r
join SOURCE.result_datasources rd on rd.id=r.id join ${stats_db_name}.result_datasources rd on rd.id=r.id
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') join ${stats_db_name}.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
join SOURCE.datasource_organizations dor on dor.id=d.id join ${stats_db_name}.datasource_organizations dor on dor.id=d.id
join SOURCE.organization o on o.id=dor.organization join ${stats_db_name}.organization o on o.id=dor.organization
join SOURCE.country c on c.code=o.country and c.continent_name='Europe' join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
left outer join SOURCE.result_pids pids on pids.id=r.id left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
left outer join SOURCE.result_cc_licence rln on rln.id=r.id left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id
left outer join SOURCE.result_projectcount rpc on rpc.id=r.id left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
left outer join SOURCE.result_fundercount rfc on rfc.id=r.id left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, c.code, c.name; cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, c.code, c.name;
create table TARGET.result_deposited_year stored as parquet as create table ${observatory_db_name}.result_deposited_year stored as parquet as
select select
count(distinct r.id) as total, count(distinct r.id) as total,
r.green, r.green,
@ -324,22 +324,22 @@ select
rfc.count > 1 as multiple_funders, rfc.count > 1 as multiple_funders,
r.type, r.type,
r.year r.year
from SOURCE.result r from ${stats_db_name}.result r
join SOURCE.result_datasources rd on rd.id=r.id join ${stats_db_name}.result_datasources rd on rd.id=r.id
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') join ${stats_db_name}.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
join SOURCE.datasource_organizations dor on dor.id=d.id join ${stats_db_name}.datasource_organizations dor on dor.id=d.id
join SOURCE.organization o on o.id=dor.organization join ${stats_db_name}.organization o on o.id=dor.organization
join SOURCE.country c on c.code=o.country and c.continent_name='Europe' join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
left outer join SOURCE.result_pids pids on pids.id=r.id left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
left outer join SOURCE.result_cc_licence rln on rln.id=r.id left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id
left outer join SOURCE.result_projectcount rpc on rpc.id=r.id left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
left outer join SOURCE.result_fundercount rfc on rfc.id=r.id left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, r.year; cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, r.year;
create table TARGET.result_deposited_year_country stored as parquet as create table ${observatory_db_name}.result_deposited_year_country stored as parquet as
select select
count(distinct r.id) as total, count(distinct r.id) as total,
r.green, r.green,
@ -355,22 +355,22 @@ select
rfc.count > 1 as multiple_funders, rfc.count > 1 as multiple_funders,
r.type, r.type,
r.year, c.code as ccode, c.name as cname r.year, c.code as ccode, c.name as cname
from SOURCE.result r from ${stats_db_name}.result r
join SOURCE.result_datasources rd on rd.id=r.id join ${stats_db_name}.result_datasources rd on rd.id=r.id
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') join ${stats_db_name}.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
join SOURCE.datasource_organizations dor on dor.id=d.id join ${stats_db_name}.datasource_organizations dor on dor.id=d.id
join SOURCE.organization o on o.id=dor.organization join ${stats_db_name}.organization o on o.id=dor.organization
join SOURCE.country c on c.code=o.country and c.continent_name='Europe' join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
left outer join SOURCE.result_pids pids on pids.id=r.id left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
left outer join SOURCE.result_cc_licence rln on rln.id=r.id left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id
left outer join SOURCE.result_projectcount rpc on rpc.id=r.id left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
left outer join SOURCE.result_fundercount rfc on rfc.id=r.id left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, r.year, c.code, c.name; cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, r.year, c.code, c.name;
create table TARGET.result_deposited_datasource stored as parquet as create table ${observatory_db_name}.result_deposited_datasource stored as parquet as
select select
count(distinct r.id) as total, count(distinct r.id) as total,
r.green, r.green,
@ -386,22 +386,22 @@ select
rfc.count > 1 as multiple_funders, rfc.count > 1 as multiple_funders,
r.type, r.type,
d.name as dname d.name as dname
from SOURCE.result r from ${stats_db_name}.result r
join SOURCE.result_datasources rd on rd.id=r.id join ${stats_db_name}.result_datasources rd on rd.id=r.id
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') join ${stats_db_name}.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
join SOURCE.datasource_organizations dor on dor.id=d.id join ${stats_db_name}.datasource_organizations dor on dor.id=d.id
join SOURCE.organization o on o.id=dor.organization join ${stats_db_name}.organization o on o.id=dor.organization
join SOURCE.country c on c.code=o.country and c.continent_name='Europe' join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
left outer join SOURCE.result_pids pids on pids.id=r.id left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
left outer join SOURCE.result_cc_licence rln on rln.id=r.id left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id
left outer join SOURCE.result_projectcount rpc on rpc.id=r.id left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
left outer join SOURCE.result_fundercount rfc on rfc.id=r.id left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, d.name; cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, d.name;
create table TARGET.result_deposited_datasource_country stored as parquet as create table ${observatory_db_name}.result_deposited_datasource_country stored as parquet as
select select
count(distinct r.id) as total, count(distinct r.id) as total,
r.green, r.green,
@ -417,22 +417,22 @@ select
rfc.count > 1 as multiple_funders, rfc.count > 1 as multiple_funders,
r.type, r.type,
d.name as dname, c.code as ccode, c.name as cname d.name as dname, c.code as ccode, c.name as cname
from SOURCE.result r from ${stats_db_name}.result r
join SOURCE.result_datasources rd on rd.id=r.id join ${stats_db_name}.result_datasources rd on rd.id=r.id
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') join ${stats_db_name}.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
join SOURCE.datasource_organizations dor on dor.id=d.id join ${stats_db_name}.datasource_organizations dor on dor.id=d.id
join SOURCE.organization o on o.id=dor.organization join ${stats_db_name}.organization o on o.id=dor.organization
join SOURCE.country c on c.code=o.country and c.continent_name='Europe' join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
left outer join SOURCE.result_pids pids on pids.id=r.id left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
left outer join SOURCE.result_cc_licence rln on rln.id=r.id left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id
left outer join SOURCE.result_projectcount rpc on rpc.id=r.id left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
left outer join SOURCE.result_fundercount rfc on rfc.id=r.id left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, d.name, c.code, c.name; cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, d.name, c.code, c.name;
create table TARGET.result_deposited_organization stored as parquet as create table ${observatory_db_name}.result_deposited_organization stored as parquet as
select select
count(distinct r.id) as total, count(distinct r.id) as total,
r.green, r.green,
@ -448,22 +448,22 @@ select
rfc.count > 1 as multiple_funders, rfc.count > 1 as multiple_funders,
r.type, r.type,
o.name as oname o.name as oname
from SOURCE.result r from ${stats_db_name}.result r
join SOURCE.result_datasources rd on rd.id=r.id join ${stats_db_name}.result_datasources rd on rd.id=r.id
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') join ${stats_db_name}.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
join SOURCE.datasource_organizations dor on dor.id=d.id join ${stats_db_name}.datasource_organizations dor on dor.id=d.id
join SOURCE.organization o on o.id=dor.organization join ${stats_db_name}.organization o on o.id=dor.organization
join SOURCE.country c on c.code=o.country and c.continent_name='Europe' join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
left outer join SOURCE.result_pids pids on pids.id=r.id left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
left outer join SOURCE.result_cc_licence rln on rln.id=r.id left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id
left outer join SOURCE.result_projectcount rpc on rpc.id=r.id left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
left outer join SOURCE.result_fundercount rfc on rfc.id=r.id left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, o.name; cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, o.name;
create table TARGET.result_deposited_organization_country stored as parquet as create table ${observatory_db_name}.result_deposited_organization_country stored as parquet as
select select
count(distinct r.id) as total, count(distinct r.id) as total,
r.green, r.green,
@ -479,22 +479,22 @@ select
rfc.count > 1 as multiple_funders, rfc.count > 1 as multiple_funders,
r.type, r.type,
o.name as oname, c.code as ccode, c.name as cname o.name as oname, c.code as ccode, c.name as cname
from SOURCE.result r from ${stats_db_name}.result r
join SOURCE.result_datasources rd on rd.id=r.id join ${stats_db_name}.result_datasources rd on rd.id=r.id
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') join ${stats_db_name}.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
join SOURCE.datasource_organizations dor on dor.id=d.id join ${stats_db_name}.datasource_organizations dor on dor.id=d.id
join SOURCE.organization o on o.id=dor.organization join ${stats_db_name}.organization o on o.id=dor.organization
join SOURCE.country c on c.code=o.country and c.continent_name='Europe' join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
left outer join SOURCE.result_pids pids on pids.id=r.id left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
left outer join SOURCE.result_cc_licence rln on rln.id=r.id left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id
left outer join SOURCE.result_projectcount rpc on rpc.id=r.id left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
left outer join SOURCE.result_fundercount rfc on rfc.id=r.id left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, o.name, c.code, c.name; cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, o.name, c.code, c.name;
create table TARGET.result_deposited_funder stored as parquet as create table ${observatory_db_name}.result_deposited_funder stored as parquet as
select select
count(distinct r.id) as total, count(distinct r.id) as total,
r.green, r.green,
@ -510,24 +510,24 @@ select
rfc.count > 1 as multiple_funders, rfc.count > 1 as multiple_funders,
r.type, r.type,
p.funder as pfunder p.funder as pfunder
from SOURCE.result r from ${stats_db_name}.result r
join SOURCE.result_datasources rd on rd.id=r.id join ${stats_db_name}.result_datasources rd on rd.id=r.id
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') join ${stats_db_name}.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
join SOURCE.datasource_organizations dor on dor.id=d.id join ${stats_db_name}.datasource_organizations dor on dor.id=d.id
join SOURCE.organization o on o.id=dor.organization join ${stats_db_name}.organization o on o.id=dor.organization
join SOURCE.country c on c.code=o.country and c.continent_name='Europe' join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
join SOURCE.result_projects rp on rp.id=r.id join ${stats_db_name}.result_projects rp on rp.id=r.id
join SOURCE.project p on p.id=rp.project join ${stats_db_name}.project p on p.id=rp.project
left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
left outer join SOURCE.result_pids pids on pids.id=r.id left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
left outer join SOURCE.result_cc_licence rln on rln.id=r.id left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id
left outer join SOURCE.result_projectcount rpc on rpc.id=r.id left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
left outer join SOURCE.result_fundercount rfc on rfc.id=r.id left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, p.funder; cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, p.funder;
create table TARGET.result_deposited_funder_country stored as parquet as create table ${observatory_db_name}.result_deposited_funder_country stored as parquet as
select select
count(distinct r.id) as total, count(distinct r.id) as total,
r.green, r.green,
@ -543,38 +543,19 @@ select
rfc.count > 1 as multiple_funders, rfc.count > 1 as multiple_funders,
r.type, r.type,
p.funder as pfunder, c.code as ccode, c.name as cname p.funder as pfunder, c.code as ccode, c.name as cname
from SOURCE.result r from ${stats_db_name}.result r
join SOURCE.result_datasources rd on rd.id=r.id join ${stats_db_name}.result_datasources rd on rd.id=r.id
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') join ${stats_db_name}.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
join SOURCE.datasource_organizations dor on dor.id=d.id join ${stats_db_name}.datasource_organizations dor on dor.id=d.id
join SOURCE.organization o on o.id=dor.organization join ${stats_db_name}.organization o on o.id=dor.organization
join SOURCE.country c on c.code=o.country and c.continent_name='Europe' join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
join SOURCE.result_projects rp on rp.id=r.id join ${stats_db_name}.result_projects rp on rp.id=r.id
join SOURCE.project p on p.id=rp.project join ${stats_db_name}.project p on p.id=rp.project
left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
left outer join SOURCE.result_pids pids on pids.id=r.id left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
left outer join SOURCE.result_cc_licence rln on rln.id=r.id left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id
left outer join SOURCE.result_projectcount rpc on rpc.id=r.id left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
left outer join SOURCE.result_fundercount rfc on rfc.id=r.id left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, p.funder, c.code, c.name; cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, p.funder, c.code, c.name;
-- compute stats TARGET.result_affiliated_country;
-- compute stats TARGET.result_affiliated_year;
-- compute stats TARGET.result_affiliated_year_country;
-- compute stats TARGET.result_affiliated_datasource;
-- compute stats TARGET.result_affiliated_datasource_country;
-- compute stats TARGET.result_affiliated_organization;
-- compute stats TARGET.result_affiliated_organization_country;
-- compute stats TARGET.result_affiliated_funder;
-- compute stats TARGET.result_affiliated_funder_country;
-- compute stats TARGET.result_deposited_country;
-- compute stats TARGET.result_deposited_year;
-- compute stats TARGET.result_deposited_year_country;
-- compute stats TARGET.result_deposited_datasource;
-- compute stats TARGET.result_deposited_datasource_country;
-- compute stats TARGET.result_deposited_organization;
-- compute stats TARGET.result_deposited_organization_country;
-- compute stats TARGET.result_deposited_funder;
-- compute stats TARGET.result_deposited_funder_country;

View File

@ -326,20 +326,44 @@
<argument>${wf:appPath()}/scripts/step20-createMonitorDB.sql</argument> <argument>${wf:appPath()}/scripts/step20-createMonitorDB.sql</argument>
<file>monitor.sh</file> <file>monitor.sh</file>
</shell> </shell>
<ok to="step21-createObservatoryDB-pre"/>
<error to="Kill"/>
</action>
<action name="step21-createObservatoryDB-pre">
<shell xmlns="uri:oozie:shell-action:0.1">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<exec>observatory-pre.sh</exec>
<argument>${stats_db_name}</argument>
<argument>${observatory_db_name}</argument>
<argument>${observatory_db_shadow_name}</argument>
<file>observatory-pre.sh</file>
</shell>
<ok to="step21-createObservatoryDB"/> <ok to="step21-createObservatoryDB"/>
<error to="Kill"/> <error to="Kill"/>
</action> </action>
<action name="step21-createObservatoryDB"> <action name="step21-createObservatoryDB">
<hive2 xmlns="uri:oozie:hive2-action:0.1">
<jdbc-url>${hive_jdbc_url}</jdbc-url>
<script>scripts/step21-createObservatoryDB.sql</script>
<param>stats_db_name=${stats_db_name}</param>
<param>observatory_db_name=${observatory_db_name}</param>
</hive2>
<ok to="step21-createObservatoryDB-post"/>
<error to="Kill"/>
</action>
<action name="step21-createObservatoryDB-post">
<shell xmlns="uri:oozie:shell-action:0.1"> <shell xmlns="uri:oozie:shell-action:0.1">
<job-tracker>${jobTracker}</job-tracker> <job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node> <name-node>${nameNode}</name-node>
<exec>observatory.sh</exec> <exec>observatory-post.sh</exec>
<argument>${stats_db_name}</argument> <argument>${stats_db_name}</argument>
<argument>${observatory_db_name}</argument> <argument>${observatory_db_name}</argument>
<argument>${observatory_db_shadow_name}</argument> <argument>${observatory_db_shadow_name}</argument>
<argument>${wf:appPath()}/scripts/step21-createObservatoryDB.sql</argument> <file>observatory-post.sh</file>
<file>observatory.sh</file>
</shell> </shell>
<ok to="Step22"/> <ok to="Step22"/>
<error to="Kill"/> <error to="Kill"/>