diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/observatory.sh b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/observatory.sh
new file mode 100644
index 000000000..ff03bca03
--- /dev/null
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/observatory.sh
@@ -0,0 +1,28 @@
+export PYTHON_EGG_CACHE=/home/$(whoami)/.python-eggs
+export link_folder=/tmp/impala-shell-python-egg-cache-$(whoami)
+if ! [ -L $link_folder ]
+then
+ rm -Rf "$link_folder"
+ ln -sfn ${PYTHON_EGG_CACHE}${link_folder} ${link_folder}
+fi
+
+export SOURCE=$1
+export TARGET=$2
+export SHADOW=$3
+export SCRIPT_PATH=$4
+
+echo "Getting file from " $4
+hdfs dfs -copyToLocal $4
+
+echo "Creating observatory database"
+impala-shell -q "drop database if exists ${TARGET} cascade"
+impala-shell -q "create database if not exists ${TARGET}"
+impala-shell -d ${SOURCE} -q "show tables" --delimited | sed "s/\(.*\)/create view ${TARGET}.\1 as select * from ${SOURCE}.\1;/" | impala-shell -f -
+cat step21-createObservatoryDB.sql | sed s/SOURCE/$1/g | sed s/TARGET/$2/g1 | impala-shell -f -
+echo "Impala shell finished"
+
+echo "Updating shadow observatory database"
+impala-shell -q "create database if not exists ${SHADOW}"
+impala-shell -d ${SHADOW} -q "show tables" --delimited | sed "s/^/drop view if exists ${SHADOW}./" | sed "s/$/;/" | impala-shell -f -
+impala-shell -d ${TARGET} -q "show tables" --delimited | sed "s/\(.*\)/create view ${SHADOW}.\1 as select * from ${TARGET}.\1;/" | impala-shell -f -
+echo "Shadow db ready!"
\ No newline at end of file
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step21-createObservatoryDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step21-createObservatoryDB.sql
new file mode 100644
index 000000000..40cdf3f6d
--- /dev/null
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step21-createObservatoryDB.sql
@@ -0,0 +1,259 @@
+create table TARGET.result_affiliated_country stored as parquet as
+select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
+ case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
+ r.peer_reviewed, r.type, c.code as ccode, c.name as cname
+from SOURCE.result r
+join SOURCE.result_organization ro on ro.id=r.id
+join SOURCE.organization o on o.id=ro.organization
+join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
+left outer join SOURCE.result_licenses rl on rl.id=r.id
+left outer join SOURCE.result_pids pids on pids.id=r.id
+group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, c.code, c.name;
+
+create table TARGET.result_affiliated_year stored as parquet as
+select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
+ case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, r.type, r.year
+from SOURCE.result r
+join SOURCE.result_organization ro on ro.id=r.id
+join SOURCE.organization o on o.id=ro.organization
+join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
+left outer join SOURCE.result_licenses rl on rl.id=r.id
+left outer join SOURCE.result_pids pids on pids.id=r.id
+group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, r.year;
+
+create table TARGET.result_affiliated_year_country stored as parquet as
+select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
+ case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
+ r.peer_reviewed, r.type, r.year, c.code as ccode, c.name as cname
+from SOURCE.result r
+join SOURCE.result_organization ro on ro.id=r.id
+join SOURCE.organization o on o.id=ro.organization
+join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
+left outer join SOURCE.result_licenses rl on rl.id=r.id
+left outer join SOURCE.result_pids pids on pids.id=r.id
+group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, r.year, c.code, c.name;
+
+create table TARGET.result_affiliated_datasource stored as parquet as
+select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
+ case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, r.type, d.name as dname
+from SOURCE.result r
+join SOURCE.result_organization ro on ro.id=r.id
+join SOURCE.organization o on o.id=ro.organization
+join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
+left outer join SOURCE.result_datasources rd on rd.id=r.id
+left outer join SOURCE.datasource d on d.id=rd.datasource
+left outer join SOURCE.result_licenses rl on rl.id=r.id
+left outer join SOURCE.result_pids pids on pids.id=r.id
+group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, d.name;
+
+create table TARGET.result_affiliated_datasource_country stored as parquet as
+select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
+ case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
+ r.peer_reviewed, r.type, d.name as dname, c.code as ccode, c.name as cname
+from SOURCE.result r
+join SOURCE.result_organization ro on ro.id=r.id
+join SOURCE.organization o on o.id=ro.organization
+join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
+left outer join SOURCE.result_datasources rd on rd.id=r.id
+left outer join SOURCE.datasource d on d.id=rd.datasource
+left outer join SOURCE.result_licenses rl on rl.id=r.id
+left outer join SOURCE.result_pids pids on pids.id=r.id
+group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, d.name, c.code, c.name;
+
+create table TARGET.result_affiliated_organization stored as parquet as
+select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
+ case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
+ r.peer_reviewed, r.type, o.name as oname
+from SOURCE.result r
+join SOURCE.result_organization ro on ro.id=r.id
+join SOURCE.organization o on o.id=ro.organization
+join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
+left outer join SOURCE.result_licenses rl on rl.id=r.id
+left outer join SOURCE.result_pids pids on pids.id=r.id
+group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, o.name;
+
+create table TARGET.result_affiliated_organization_country stored as parquet as
+select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
+ case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
+ r.peer_reviewed, r.type, o.name as oname, c.code as ccode, c.name as cname
+from SOURCE.result r
+join SOURCE.result_organization ro on ro.id=r.id
+join SOURCE.organization o on o.id=ro.organization
+join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
+left outer join SOURCE.result_licenses rl on rl.id=r.id
+left outer join SOURCE.result_pids pids on pids.id=r.id
+group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, o.name, c.code, c.name;
+
+create table TARGET.result_affiliated_funder stored as parquet as
+select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
+ case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, r.type, p.funder as pfunder
+from SOURCE.result r
+join SOURCE.result_organization ro on ro.id=r.id
+join SOURCE.organization o on o.id=ro.organization
+join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
+join SOURCE.result_projects rp on rp.id=r.id
+join SOURCE.project p on p.id=rp.project
+left outer join SOURCE.result_licenses rl on rl.id=r.id
+left outer join SOURCE.result_pids pids on pids.id=r.id
+group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, p.funder;
+
+create table TARGET.result_affiliated_funder_country stored as parquet as
+select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
+ case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
+ r.peer_reviewed, r.type, p.funder as pfunder, c.code as ccode, c.name as cname
+from SOURCE.result r
+join SOURCE.result_organization ro on ro.id=r.id
+join SOURCE.organization o on o.id=ro.organization
+join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
+join SOURCE.result_projects rp on rp.id=r.id
+join SOURCE.project p on p.id=rp.project
+left outer join SOURCE.result_licenses rl on rl.id=r.id
+left outer join SOURCE.result_pids pids on pids.id=r.id
+group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, p.funder, c.code, c.name;
+
+create table TARGET.result_deposited_country stored as parquet as
+select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
+ case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
+ r.peer_reviewed, r.type, c.code as ccode, c.name as cname
+from SOURCE.result r
+join SOURCE.result_datasources rd on rd.id=r.id
+join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
+join SOURCE.datasource_organizations dor on dor.id=d.id
+join SOURCE.organization o on o.id=dor.organization
+join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
+left outer join SOURCE.result_licenses rl on rl.id=r.id
+left outer join SOURCE.result_pids pids on pids.id=r.id
+group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, c.code, c.name;
+
+create table TARGET.result_deposited_year stored as parquet as
+select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
+ case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, r.type, r.year
+from SOURCE.result r
+join SOURCE.result_datasources rd on rd.id=r.id
+join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
+join SOURCE.datasource_organizations dor on dor.id=d.id
+join SOURCE.organization o on o.id=dor.organization
+join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
+left outer join SOURCE.result_licenses rl on rl.id=r.id
+left outer join SOURCE.result_pids pids on pids.id=r.id
+group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, r.year;
+
+create table TARGET.result_deposited_year_country stored as parquet as
+select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
+ case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
+ r.peer_reviewed, r.type, r.year, c.code as ccode, c.name as cname
+from SOURCE.result r
+join SOURCE.result_datasources rd on rd.id=r.id
+join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
+join SOURCE.datasource_organizations dor on dor.id=d.id
+join SOURCE.organization o on o.id=dor.organization
+join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
+left outer join SOURCE.result_licenses rl on rl.id=r.id
+left outer join SOURCE.result_pids pids on pids.id=r.id
+group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, r.year, c.code, c.name;
+
+create table TARGET.result_deposited_datasource stored as parquet as
+select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
+ case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
+ r.peer_reviewed, r.type, d.name as dname
+from SOURCE.result r
+join SOURCE.result_datasources rd on rd.id=r.id
+join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
+join SOURCE.datasource_organizations dor on dor.id=d.id
+join SOURCE.organization o on o.id=dor.organization
+join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
+left outer join SOURCE.result_licenses rl on rl.id=r.id
+left outer join SOURCE.result_pids pids on pids.id=r.id
+group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, d.name;
+
+create table TARGET.result_deposited_datasource_country stored as parquet as
+select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
+ case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
+ r.peer_reviewed, r.type, d.name as dname, c.code as ccode, c.name as cname
+from SOURCE.result r
+join SOURCE.result_datasources rd on rd.id=r.id
+join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
+join SOURCE.datasource_organizations dor on dor.id=d.id
+join SOURCE.organization o on o.id=dor.organization
+join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
+left outer join SOURCE.result_licenses rl on rl.id=r.id
+left outer join SOURCE.result_pids pids on pids.id=r.id
+group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, d.name, c.code, c.name;
+
+create table TARGET.result_deposited_organization stored as parquet as
+select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
+ case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, r.type, o.name as oname
+from SOURCE.result r
+join SOURCE.result_datasources rd on rd.id=r.id
+join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
+join SOURCE.datasource_organizations dor on dor.id=d.id
+join SOURCE.organization o on o.id=dor.organization
+join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
+left outer join SOURCE.result_licenses rl on rl.id=r.id
+left outer join SOURCE.result_pids pids on pids.id=r.id
+group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, o.name;
+
+create table TARGET.result_deposited_organization_country stored as parquet as
+select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
+ case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
+ r.peer_reviewed, r.type, o.name as oname, c.code as ccode, c.name as cname
+from SOURCE.result r
+join SOURCE.result_datasources rd on rd.id=r.id
+join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
+join SOURCE.datasource_organizations dor on dor.id=d.id
+join SOURCE.organization o on o.id=dor.organization
+join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
+left outer join SOURCE.result_licenses rl on rl.id=r.id
+left outer join SOURCE.result_pids pids on pids.id=r.id
+group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, o.name, c.code, c.name;
+
+create table TARGET.result_deposited_funder stored as parquet as
+select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
+ case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
+ r.peer_reviewed, r.type, p.funder as pfunder
+from SOURCE.result r
+join SOURCE.result_datasources rd on rd.id=r.id
+join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
+join SOURCE.datasource_organizations dor on dor.id=d.id
+join SOURCE.organization o on o.id=dor.organization
+join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
+join SOURCE.result_projects rp on rp.id=r.id
+join SOURCE.project p on p.id=rp.project
+left outer join SOURCE.result_licenses rl on rl.id=r.id
+left outer join SOURCE.result_pids pids on pids.id=r.id
+group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, p.funder;
+
+create table TARGET.result_deposited_funder_country stored as parquet as
+select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
+ case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
+ r.peer_reviewed, r.type, p.funder as pfunder, c.code as ccode, c.name as cname
+from SOURCE.result r
+join SOURCE.result_datasources rd on rd.id=r.id
+join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
+join SOURCE.datasource_organizations dor on dor.id=d.id
+join SOURCE.organization o on o.id=dor.organization
+join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
+join SOURCE.result_projects rp on rp.id=r.id
+join SOURCE.project p on p.id=rp.project
+left outer join SOURCE.result_licenses rl on rl.id=r.id
+left outer join SOURCE.result_pids pids on pids.id=r.id
+group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, p.funder, c.code, c.name;
+
+compute stats TARGET.result_affiliated_country;
+compute stats TARGET.result_affiliated_year;
+compute stats TARGET.result_affiliated_year_country;
+compute stats TARGET.result_affiliated_datasource;
+compute stats TARGET.result_affiliated_datasource_country;
+compute stats TARGET.result_affiliated_organization;
+compute stats TARGET.result_affiliated_organization_country;
+compute stats TARGET.result_affiliated_funder;
+compute stats TARGET.result_affiliated_funder_country;
+compute stats TARGET.result_deposited_country;
+compute stats TARGET.result_deposited_year;
+compute stats TARGET.result_deposited_year_country;
+compute stats TARGET.result_deposited_datasource;
+compute stats TARGET.result_deposited_datasource_country;
+compute stats TARGET.result_deposited_organization;
+compute stats TARGET.result_deposited_organization_country;
+compute stats TARGET.result_deposited_funder;
+compute stats TARGET.result_deposited_funder_country;
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml
index 321500e2c..824a8b3c7 100644
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml
@@ -25,6 +25,14 @@
monitor_db_shadow_name
the name of the shadow monitor db
+
+ observatory_db_name
+ the target monitor db name
+
+
+ observatory_db_shadow_name
+ the name of the shadow monitor db
+
stats_tool_api_url
The url of the API of the stats tool. Is used to trigger the cache update.
@@ -305,11 +313,26 @@
${wf:appPath()}/scripts/step20-createMonitorDB.sql
monitor.sh
-
+
-
+
+
+ ${jobTracker}
+ ${nameNode}
+ observatory.sh
+ ${stats_db_name}
+ ${observatory_db_name}
+ ${observatory_db_shadow_name}
+ ${wf:appPath()}/scripts/step21-createObservatoryDB.sql
+ observatory.sh
+
+
+
+
+
+
${jobTracker}
${nameNode}
@@ -322,4 +345,4 @@
-
+
\ No newline at end of file