diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/finalizedb.sh b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/finalizedb.sh
index 60771dfa7..9de472955 100644
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/finalizedb.sh
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/finalizedb.sh
@@ -11,10 +11,4 @@ export SHADOW=$2
echo "Updating shadow database"
hive --database ${SOURCE} -e "show tables" | grep -v WARN | sed "s/^\(.*\)/analyze table ${SOURCE}.\1 compute statistics;/" > foo
-hive -f foo
-hive -e "create database if not exists ${SHADOW}"
-hive --database ${SHADOW} -e "show tables" | grep -v WARN | sed "s/^/drop view if exists ${SHADOW}./" | sed "s/$/;/" > foo
-hive -f foo
-hive --database ${SOURCE} -e "show tables" | grep -v WARN | sed "s/\(.*\)/create view ${SHADOW}.\1 as select * from ${SOURCE}.\1;/" > foo
-hive -f foo
-echo "Shadow db ready!"
\ No newline at end of file
+hive -f foo
\ No newline at end of file
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/monitor.sh b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/monitor.sh
index 37809652d..a4e7eec57 100644
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/monitor.sh
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/monitor.sh
@@ -17,12 +17,4 @@ hdfs dfs -copyToLocal $4
echo "Creating monitor database"
cat step20-createMonitorDB.sql | sed s/SOURCE/$1/g | sed s/TARGET/$2/g1 > foo
hive -f foo
-echo "Impala shell finished"
-
-echo "Updating shadow monitor database"
-hive -e "create database if not exists ${SHADOW}"
-hive --database ${SHADOW} -e "show tables" | grep -v WARN | sed "s/^/drop view if exists ${SHADOW}./" | sed "s/$/;/" > foo
-hive -f foo
-hive --database ${TARGET} -e "show tables" | grep -v WARN | sed "s/\(.*\)/create view ${SHADOW}.\1 as select * from ${TARGET}.\1;/" > foo
-hive -f foo
-echo "Shadow db ready!"
\ No newline at end of file
+echo "Impala shell finished"
\ No newline at end of file
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/observatory-post.sh b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/observatory-post.sh
index d074e6a55..12315c9e8 100644
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/observatory-post.sh
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/observatory-post.sh
@@ -12,12 +12,4 @@ export SHADOW=$3
hive --database ${TARGET} -e "show tables" | grep -v WARN | sed "s/\(.*\)/analyze table ${TARGET}.\1 compute statistics;/" > foo
hive -f foo
-echo "Impala shell finished"
-
-echo "Updating shadow observatory database"
-hive -e "create database if not exists ${SHADOW}"
-hive --database ${SHADOW} -e "show tables" | grep -v WARN | sed "s/^/drop view if exists ${SHADOW}./" | sed "s/$/;/" > foo
-hive -f foo
-hive -d ${TARGET} -e "show tables" | grep -v WARN | sed "s/\(.*\)/create view ${SHADOW}.\1 as select * from ${TARGET}.\1;/" > foo
-hive -f foo
-echo "Shadow db ready!"
\ No newline at end of file
+echo "Impala shell finished"
\ No newline at end of file
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql
index 6753d8190..33e1e3527 100644
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql
@@ -48,12 +48,10 @@ WHERE d1.datainfo.deletedbyinference = FALSE;
-- Updating temporary table with everything that is not based on results -> This is done with the following "dual" table.
-- Creating a temporary dual table that will be removed after the following insert
-CREATE TABLE ${stats_db_name}.dual
-(
- dummy CHAR(1)
-);
-INSERT INTO ${stats_db_name}.dual
-VALUES ('X');
+CREATE TABLE ${stats_db_name}.dual ( dummy CHAR(1));
+
+INSERT INTO ${stats_db_name}.dual VALUES ('X');
+
INSERT INTO ${stats_db_name}.datasource_tmp (`id`, `name`, `type`, `dateofvalidation`, `yearofvalidation`, `harvested`,
`piwik_id`, `latitude`, `longitude`, `websiteurl`, `compatibility`, `issn_printed`, `issn_online`)
SELECT 'other',
@@ -73,12 +71,8 @@ FROM ${stats_db_name}.dual
WHERE 'other' not in (SELECT id FROM ${stats_db_name}.datasource_tmp WHERE name = 'Unknown Repository');
DROP TABLE ${stats_db_name}.dual;
-UPDATE ${stats_db_name}.datasource_tmp
-SET name='Other'
-WHERE name = 'Unknown Repository';
-UPDATE ${stats_db_name}.datasource_tmp
-SET yearofvalidation=null
-WHERE yearofvalidation = '-1';
+UPDATE ${stats_db_name}.datasource_tmp SET name='Other' WHERE name = 'Unknown Repository';
+UPDATE ${stats_db_name}.datasource_tmp SET yearofvalidation=null WHERE yearofvalidation = '-1';
CREATE TABLE ${stats_db_name}.datasource_languages STORED AS PARQUET AS
SELECT substr(d.id, 4) AS id, langs.languages AS language
@@ -91,8 +85,7 @@ FROM ${openaire_db_name}.datasource d LATERAL VIEW explode(d.originalid) oids AS
CREATE TABLE ${stats_db_name}.datasource_organizations STORED AS PARQUET AS
SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization
FROM ${openaire_db_name}.relation r
-WHERE r.reltype = 'datasourceOrganization'
- and r.datainfo.deletedbyinference = false;
+WHERE r.reltype = 'datasourceOrganization' and r.datainfo.deletedbyinference = false;
-- datasource sources:
-- where the datasource info have been collected from.
@@ -101,6 +94,6 @@ select substr(d.id, 4) as id, substr(cf.key, 4) as datasource
from ${openaire_db_name}.datasource d lateral view explode(d.collectedfrom) cfrom as cf
where d.datainfo.deletedbyinference = false;
-CREATE OR REPLACE VIEW ${stats_db_name}.datasource_results STORED AS PARQUET AS
+CREATE OR REPLACE VIEW ${stats_db_name}.datasource_results AS
SELECT datasource AS id, id AS result
FROM ${stats_db_name}.result_datasources;
\ No newline at end of file
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml
index 08d33f4e8..7ac3cefbb 100644
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml
@@ -365,11 +365,43 @@
${observatory_db_shadow_name}
observatory-post.sh
-
+
-
+
+
+ ${jobTracker}
+ ${nameNode}
+ copyDataToImpalaCluster.sh
+ ${external_stats_db_name}
+ ${stats_db_name}
+ ${monitor_db_name}
+ ${observatory_db_name}
+ copyDataToImpalaCluster.sh
+
+
+
+
+
+
+
+ ${jobTracker}
+ ${nameNode}
+ finalizeImpalaCluster.sh
+ ${stats_db_name}
+ ${stats_db_shadow_name}
+ ${monitor_db_name}
+ ${monitor_db_shadow_name}
+ ${observatory_db_name}
+ ${observatory_db_shadow_name}
+ finalizeImpalaCluster.sh
+
+
+
+
+
+
${jobTracker}
${nameNode}