diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/finalizedb.sh b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/finalizedb.sh
new file mode 100644
index 000000000..57acb2ee7
--- /dev/null
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/finalizedb.sh
@@ -0,0 +1,18 @@
+export PYTHON_EGG_CACHE=/home/$(whoami)/.python-eggs
+export link_folder=/tmp/impala-shell-python-egg-cache-$(whoami)
+if ! [ -L $link_folder ]
+then
+ rm -Rf "$link_folder"
+ ln -sfn ${PYTHON_EGG_CACHE}${link_folder} ${link_folder}
+fi
+
+export SOURCE=$1
+export SHADOW=$2
+
+echo "Updating shadow database"
+impala-shell -d ${SOURCE} -q "invalidate metadata"
+impala-shell -d ${SOURCE} -q "show tables" --delimited | sed "s/^\(.*\)/compute stats ${SOURCE}.\1;/" | impala-shell -c -f -
+impala-shell -q "create database if not exists ${SHADOW}"
+impala-shell -d ${SHADOW} -q "show tables" --delimited | sed "s/^/drop view if exists ${SHADOW}./" | sed "s/$/;/" | impala-shell -c -f -
+impala-shell -d ${SOURCE} -q "show tables" --delimited | sed "s/\(.*\)/create view ${SHADOW}.\1 as select * from ${SOURCE}.\1;/" | impala-shell -c -f -
+echo "Shadow db ready!"
\ No newline at end of file
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/impala-shell.sh b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/impala-shell.sh
deleted file mode 100644
index 70112dc7b..000000000
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/impala-shell.sh
+++ /dev/null
@@ -1,18 +0,0 @@
-export PYTHON_EGG_CACHE=/home/$(whoami)/.python-eggs
-export link_folder=/tmp/impala-shell-python-egg-cache-$(whoami)
-if ! [ -L $link_folder ]
-then
- rm -Rf "$link_folder"
- ln -sfn ${PYTHON_EGG_CACHE}${link_folder} ${link_folder}
-fi
-
-echo "Getting file from " $3
-hdfs dfs -copyToLocal $3
-
-echo "Running impala shell make the new database visible"
-impala-shell -q "INVALIDATE METADATA;"
-
-echo "Running impala shell to compute new table stats"
-impala-shell -d $1 -f $2
-echo "Impala shell finished"
-rm $2
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/monitor.sh b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/monitor.sh
new file mode 100644
index 000000000..c5bda6d39
--- /dev/null
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/monitor.sh
@@ -0,0 +1,25 @@
+export PYTHON_EGG_CACHE=/home/$(whoami)/.python-eggs
+export link_folder=/tmp/impala-shell-python-egg-cache-$(whoami)
+if ! [ -L $link_folder ]
+then
+ rm -Rf "$link_folder"
+ ln -sfn ${PYTHON_EGG_CACHE}${link_folder} ${link_folder}
+fi
+
+export SOURCE=$1
+export TARGET=$2
+export SHADOW=$3
+export SCRIPT_PATH=$4
+
+echo "Getting file from " $4
+hdfs dfs -copyToLocal $4
+
+echo "Creating monitor database"
+cat step20-createMonitorDB.sql | sed s/SOURCE/$1/g | sed s/TARGET/$2/g1 | impala-shell -f -
+echo "Impala shell finished"
+
+echo "Updating shadow monitor database"
+impala-shell -q "create database if not exists ${SHADOW}"
+impala-shell -d ${SHADOW} -q "show tables" --delimited | sed "s/^/drop view if exists ${SHADOW}./" | sed "s/$/;/" | impala-shell -f -
+impala-shell -d ${TARGET} -q "show tables" --delimited | sed "s/\(.*\)/create view ${SHADOW}.\1 as select * from ${TARGET}.\1;/" | impala-shell -f -
+echo "Shadow db ready!"
\ No newline at end of file
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step10.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step10.sql
index 46ff295f4..6c96317e6 100644
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step10.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step10.sql
@@ -3,14 +3,37 @@
-- Tables/views from external tables/views (Fundref, Country, CountyGDP, roarmap, rndexpediture)
------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------
-CREATE OR REPLACE VIEW ${stats_db_name}.fundref AS SELECT * FROM ${external_stats_db_name}.fundref;
-CREATE OR REPLACE VIEW ${stats_db_name}.country AS SELECT * FROM ${external_stats_db_name}.country;
-CREATE OR REPLACE VIEW ${stats_db_name}.countrygdp AS SELECT * FROM ${external_stats_db_name}.countrygdp;
-CREATE OR REPLACE VIEW ${stats_db_name}.roarmap AS SELECT * FROM ${external_stats_db_name}.roarmap;
-CREATE OR REPLACE VIEW ${stats_db_name}.rndexpediture AS SELECT * FROM ${external_stats_db_name}.rndexpediture;
-CREATE OR REPLACE VIEW ${stats_db_name}.context AS SELECT * FROM ${external_stats_db_name}.context;
-CREATE OR REPLACE VIEW ${stats_db_name}.category AS SELECT * FROM ${external_stats_db_name}.category;
-CREATE OR REPLACE VIEW ${stats_db_name}.concept AS SELECT * FROM ${external_stats_db_name}.concept;
+CREATE OR REPLACE VIEW ${stats_db_name}.fundref AS
+SELECT *
+FROM ${external_stats_db_name}.fundref;
+
+CREATE OR REPLACE VIEW ${stats_db_name}.country AS
+SELECT *
+FROM ${external_stats_db_name}.country;
+
+CREATE OR REPLACE VIEW ${stats_db_name}.countrygdp AS
+SELECT *
+FROM ${external_stats_db_name}.countrygdp;
+
+CREATE OR REPLACE VIEW ${stats_db_name}.roarmap AS
+SELECT *
+FROM ${external_stats_db_name}.roarmap;
+
+CREATE OR REPLACE VIEW ${stats_db_name}.rndexpediture AS
+SELECT *
+FROM ${external_stats_db_name}.rndexpediture;
+
+CREATE OR REPLACE VIEW ${stats_db_name}.context AS
+SELECT *
+FROM ${external_stats_db_name}.context;
+
+CREATE OR REPLACE VIEW ${stats_db_name}.category AS
+SELECT *
+FROM ${external_stats_db_name}.category;
+
+CREATE OR REPLACE VIEW ${stats_db_name}.concept AS
+SELECT *
+FROM ${external_stats_db_name}.concept;
------------------------------------------------------------------------------------------------
@@ -18,4 +41,8 @@ CREATE OR REPLACE VIEW ${stats_db_name}.concept AS SELECT * FROM ${external_sta
-- Creation date of the database
------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------
-create table ${stats_db_name}.creation_date as select date_format(current_date(), 'dd-MM-yyyy') as date;
\ No newline at end of file
+create table ${stats_db_name}.creation_date as
+select date_format(current_date(), 'dd-MM-yyyy') as date;
+--
+-- ANALYZE TABLE ${stats_db_name}.creation_date COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.creation_date COMPUTE STATISTICS FOR COLUMNS;
\ No newline at end of file
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step11.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step11.sql
index 13e141459..d26169fd6 100644
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step11.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step11.sql
@@ -5,40 +5,114 @@
----------------------------------------------------------------
--Datasource temporary table updates
-UPDATE ${stats_db_name}.datasource_tmp SET harvested='true' WHERE datasource_tmp.id IN (SELECT DISTINCT d.id FROM ${stats_db_name}.datasource_tmp d, ${stats_db_name}.result_datasources rd WHERE d.id=rd.datasource);
+UPDATE ${stats_db_name}.datasource_tmp
+SET harvested='true'
+WHERE datasource_tmp.id IN (SELECT DISTINCT d.id
+ FROM ${stats_db_name}.datasource_tmp d,
+ ${stats_db_name}.result_datasources rd
+ WHERE d.id = rd.datasource);
-- Project temporary table update and final project table creation with final updates that can not be applied to ORC tables
-UPDATE ${stats_db_name}.project_tmp SET haspubs='yes' WHERE project_tmp.id IN (SELECT pr.id FROM ${stats_db_name}.project_results pr, ${stats_db_name}.result r WHERE pr.result=r.id AND r.type='publication');
+UPDATE ${stats_db_name}.project_tmp
+SET haspubs='yes'
+WHERE project_tmp.id IN (SELECT pr.id
+ FROM ${stats_db_name}.project_results pr,
+ ${stats_db_name}.result r
+ WHERE pr.result = r.id
+ AND r.type = 'publication');
-DROP TABLE IF EXISTS ${stats_db_name}.project;
CREATE TABLE ${stats_db_name}.project stored as parquet as
-SELECT p.id , p.acronym, p.title, p.funder, p.funding_lvl0, p.funding_lvl1, p.funding_lvl2, p.ec39, p.type, p.startdate, p.enddate, p.start_year, p.end_year, p.duration,
-CASE WHEN prr1.id IS NULL THEN 'no' ELSE 'yes' END AS haspubs,
-CASE WHEN prr1.id IS NULL THEN 0 ELSE prr1.np END AS numpubs,
-CASE WHEN prr2.id IS NULL THEN 0 ELSE prr2.daysForlastPub END AS daysforlastpub,
-CASE WHEN prr2.id IS NULL THEN 0 ELSE prr2.dp END AS delayedpubs,
-p.callidentifier, p.code
-FROM ${stats_db_name}.project_tmp p
-LEFT JOIN (SELECT pr.id, count(distinct pr.result) AS np
- FROM ${stats_db_name}.project_results pr INNER JOIN ${stats_db_name}.result r ON pr.result=r.id
- WHERE r.type='publication'
- GROUP BY pr.id) AS prr1 on prr1.id = p.id
-LEFT JOIN (SELECT pp.id, max(datediff(to_date(r.date), to_date(pp.enddate)) ) AS daysForlastPub , count(distinct r.id) AS dp
- FROM ${stats_db_name}.project_tmp pp, ${stats_db_name}.project_results pr, ${stats_db_name}.result r
- WHERE pp.id=pr.id AND pr.result=r.id AND r.type='publication' AND datediff(to_date(r.date), to_date(pp.enddate)) > 0
- GROUP BY pp.id) AS prr2
- ON prr2.id = p.id;
-
--- Publication temporary table updates
-UPDATE ${stats_db_name}.publication_tmp SET delayed = 'yes' WHERE publication_tmp.id IN (SELECT distinct r.id FROM stats_wf_db_obs.result r, ${stats_db_name}.project_results pr, ${stats_db_name}.project_tmp p WHERE r.id=pr.result AND pr.id=p.id AND to_date(r.date)-to_date(p.enddate) > 0);
+SELECT p.id,
+ p.acronym,
+ p.title,
+ p.funder,
+ p.funding_lvl0,
+ p.funding_lvl1,
+ p.funding_lvl2,
+ p.ec39,
+ p.type,
+ p.startdate,
+ p.enddate,
+ p.start_year,
+ p.end_year,
+ p.duration,
+ CASE WHEN prr1.id IS NULL THEN 'no' ELSE 'yes' END AS haspubs,
+ CASE WHEN prr1.id IS NULL THEN 0 ELSE prr1.np END AS numpubs,
+ CASE WHEN prr2.id IS NULL THEN 0 ELSE prr2.daysForlastPub END AS daysforlastpub,
+ CASE WHEN prr2.id IS NULL THEN 0 ELSE prr2.dp END AS delayedpubs,
+ p.callidentifier,
+ p.code
+FROM ${stats_db_name}.project_tmp p
+ LEFT JOIN (SELECT pr.id, count(distinct pr.result) AS np
+ FROM ${stats_db_name}.project_results pr
+ INNER JOIN ${stats_db_name}.result r ON pr.result = r.id
+ WHERE r.type = 'publication'
+ GROUP BY pr.id) AS prr1 on prr1.id = p.id
+ LEFT JOIN (SELECT pp.id,
+ max(datediff(to_date(r.date), to_date(pp.enddate))) AS daysForlastPub,
+ count(distinct r.id) AS dp
+ FROM ${stats_db_name}.project_tmp pp,
+ ${stats_db_name}.project_results pr,
+ ${stats_db_name}.result r
+ WHERE pp.id = pr.id
+ AND pr.result = r.id
+ AND r.type = 'publication'
+ AND datediff(to_date(r.date), to_date(pp.enddate)) > 0
+ GROUP BY pp.id) AS prr2
+ ON prr2.id = p.id;
--- Dataset temporary table updates
-UPDATE ${stats_db_name}.dataset_tmp SET delayed = 'yes' WHERE dataset_tmp.id IN (SELECT distinct r.id FROM stats_wf_db_obs.result r, ${stats_db_name}.project_results pr, ${stats_db_name}.project_tmp p WHERE r.id=pr.result AND pr.id=p.id AND to_date(r.date)-to_date(p.enddate) > 0);
+UPDATE ${stats_db_name}.publication_tmp
+SET delayed = 'yes'
+WHERE publication_tmp.id IN (SELECT distinct r.id
+ FROM stats_wf_db_obs.result r,
+ ${stats_db_name}.project_results pr,
+ ${stats_db_name}.project_tmp p
+ WHERE r.id = pr.result
+ AND pr.id = p.id
+ AND to_date(r.date) - to_date(p.enddate) > 0);
--- Software temporary table updates
-UPDATE ${stats_db_name}.software_tmp SET delayed = 'yes' WHERE software_tmp.id IN (SELECT distinct r.id FROM ${stats_db_name}.result r, ${stats_db_name}.project_results pr, ${stats_db_name}.project_tmp p WHERE r.id=pr.result AND pr.id=p.id AND to_date(r.date)-to_date(p.enddate) > 0);
+UPDATE ${stats_db_name}.dataset_tmp
+SET delayed = 'yes'
+WHERE dataset_tmp.id IN (SELECT distinct r.id
+ FROM stats_wf_db_obs.result r,
+ ${stats_db_name}.project_results pr,
+ ${stats_db_name}.project_tmp p
+ WHERE r.id = pr.result
+ AND pr.id = p.id
+ AND to_date(r.date) - to_date(p.enddate) > 0);
--- Oherresearchproduct temporary table updates
-UPDATE ${stats_db_name}.otherresearchproduct_tmp SET delayed = 'yes' WHERE otherresearchproduct_tmp.id IN (SELECT distinct r.id FROM ${stats_db_name}.result r, ${stats_db_name}.project_results pr, ${stats_db_name}.project_tmp p WHERE r.id=pr.result AND pr.id=p.id AND to_date(r.date)-to_date(p.enddate) > 0);
+UPDATE ${stats_db_name}.software_tmp
+SET delayed = 'yes'
+WHERE software_tmp.id IN (SELECT distinct r.id
+ FROM ${stats_db_name}.result r,
+ ${stats_db_name}.project_results pr,
+ ${stats_db_name}.project_tmp p
+ WHERE r.id = pr.result
+ AND pr.id = p.id
+ AND to_date(r.date) - to_date(p.enddate) > 0);
-CREATE OR REPLACE VIEW ${stats_db_name}.project_results_publication AS SELECT result_projects.id AS result, result_projects.project AS project_results, result.date as resultdate, project.enddate as projectenddate, result_projects.daysfromend AS daysfromend FROM ${stats_db_name}.result_projects, ${stats_db_name}.result, ${stats_db_name}.project WHERE result_projects.id=result.id AND result.type='publication' AND project.id=result_projects.project;
+UPDATE ${stats_db_name}.otherresearchproduct_tmp
+SET delayed = 'yes'
+WHERE otherresearchproduct_tmp.id IN (SELECT distinct r.id
+ FROM ${stats_db_name}.result r,
+ ${stats_db_name}.project_results pr,
+ ${stats_db_name}.project_tmp p
+ WHERE r.id = pr.result
+ AND pr.id = p.id
+ AND to_date(r.date) - to_date(p.enddate) > 0);
+
+CREATE OR REPLACE VIEW ${stats_db_name}.project_results_publication AS
+SELECT result_projects.id AS result,
+ result_projects.project AS project_results,
+ result.date as resultdate,
+ project.enddate as projectenddate,
+ result_projects.daysfromend AS daysfromend
+FROM ${stats_db_name}.result_projects,
+ ${stats_db_name}.result,
+ ${stats_db_name}.project
+WHERE result_projects.id = result.id
+ AND result.type = 'publication'
+ AND project.id = result_projects.project;
+
+-- ANALYZE TABLE ${stats_db_name}.project COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.project COMPUTE STATISTICS FOR COLUMNS;
\ No newline at end of file
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step12.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step12.sql
index 25439852e..51d3a73c9 100644
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step12.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step12.sql
@@ -1,21 +1,25 @@
------------------------------------------------------------------------------------------------------
-- Creating parquet tables from the updated temporary tables and removing unnecessary temporary tables
------------------------------------------------------------------------------------------------------
+CREATE TABLE ${stats_db_name}.datasource stored AS parquet AS
+SELECT *
+FROM ${stats_db_name}.datasource_tmp;
-DROP TABLE IF EXISTS ${stats_db_name}.datasource;
-CREATE TABLE ${stats_db_name}.datasource stored AS parquet AS SELECT * FROM ${stats_db_name}.datasource_tmp;
+CREATE TABLE ${stats_db_name}.publication stored AS parquet AS
+SELECT *
+FROM ${stats_db_name}.publication_tmp;
-DROP TABLE IF EXISTS ${stats_db_name}.publication;
-CREATE TABLE ${stats_db_name}.publication stored AS parquet AS SELECT * FROM ${stats_db_name}.publication_tmp;
+CREATE TABLE ${stats_db_name}.dataset stored AS parquet AS
+SELECT *
+FROM ${stats_db_name}.dataset_tmp;
-DROP TABLE IF EXISTS ${stats_db_name}.dataset;
-CREATE TABLE ${stats_db_name}.dataset stored AS parquet AS SELECT * FROM ${stats_db_name}.dataset_tmp;
+CREATE TABLE ${stats_db_name}.software stored AS parquet AS
+SELECT *
+FROM ${stats_db_name}.software_tmp;
-DROP TABLE IF EXISTS ${stats_db_name}.software;
-CREATE TABLE ${stats_db_name}.software stored AS parquet AS SELECT * FROM ${stats_db_name}.software_tmp;
-
-DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct;
-CREATE TABLE ${stats_db_name}.otherresearchproduct stored AS parquet AS SELECT * FROM ${stats_db_name}.otherresearchproduct_tmp;
+CREATE TABLE ${stats_db_name}.otherresearchproduct stored AS parquet AS
+SELECT *
+FROM ${stats_db_name}.otherresearchproduct_tmp;
DROP TABLE ${stats_db_name}.project_tmp;
DROP TABLE ${stats_db_name}.datasource_tmp;
@@ -29,10 +33,47 @@ DROP TABLE ${stats_db_name}.otherresearchproduct_tmp;
---------------------------------------------
-- Result
-CREATE OR REPLACE VIEW ${stats_db_name}.result AS SELECT *, bestlicence AS access_mode FROM ${stats_db_name}.publication UNION ALL SELECT *, bestlicence as access_mode FROM ${stats_db_name}.software UNION ALL SELECT *, bestlicence AS access_mode FROM ${stats_db_name}.dataset UNION ALL SELECT *, bestlicence AS access_mode FROM ${stats_db_name}.otherresearchproduct;
+CREATE OR REPLACE VIEW ${stats_db_name}.result AS
+SELECT *, bestlicence AS access_mode
+FROM ${stats_db_name}.publication
+UNION ALL
+SELECT *, bestlicence as access_mode
+FROM ${stats_db_name}.software
+UNION ALL
+SELECT *, bestlicence AS access_mode
+FROM ${stats_db_name}.dataset
+UNION ALL
+SELECT *, bestlicence AS access_mode
+FROM ${stats_db_name}.otherresearchproduct;
-------------------------------------------------------------------------------
-- To see with Antonis if the following is needed and where it should be placed
-------------------------------------------------------------------------------
-CREATE TABLE ${stats_db_name}.numbers_country AS SELECT org.country AS country, count(distinct rd.datasource) AS datasources, count(distinct r.id) AS publications FROM ${stats_db_name}.result r, ${stats_db_name}.result_datasources rd, ${stats_db_name}.datasource d, ${stats_db_name}.datasource_organizations dor, ${stats_db_name}.organization org WHERE r.id=rd.id AND rd.datasource=d.id AND d.id=dor.id AND dor.organization=org.id AND r.type='publication' AND r.bestlicence='Open Access' GROUP BY org.country;
+CREATE TABLE ${stats_db_name}.numbers_country AS
+SELECT org.country AS country, count(distinct rd.datasource) AS datasources, count(distinct r.id) AS publications
+FROM ${stats_db_name}.result r,
+ ${stats_db_name}.result_datasources rd,
+ ${stats_db_name}.datasource d,
+ ${stats_db_name}.datasource_organizations dor,
+ ${stats_db_name}.organization org
+WHERE r.id = rd.id
+ AND rd.datasource = d.id
+ AND d.id = dor.id
+ AND dor.organization = org.id
+ AND r.type = 'publication'
+ AND r.bestlicence = 'Open Access'
+GROUP BY org.country;
+
+-- ANALYZE TABLE ${stats_db_name}.datasource COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.datasource COMPUTE STATISTICS FOR COLUMNS;
+-- ANALYZE TABLE ${stats_db_name}.publication COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.publication COMPUTE STATISTICS FOR COLUMNS;
+-- ANALYZE TABLE ${stats_db_name}.dataset COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.dataset COMPUTE STATISTICS FOR COLUMNS;
+-- ANALYZE TABLE ${stats_db_name}.software COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.software COMPUTE STATISTICS FOR COLUMNS;
+-- ANALYZE TABLE ${stats_db_name}.otherresearchproduct COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.otherresearchproduct COMPUTE STATISTICS FOR COLUMNS;
+-- ANALYZE TABLE ${stats_db_name}.numbers_country COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.numbers_country COMPUTE STATISTICS FOR COLUMNS;
\ No newline at end of file
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql
index 795770313..d79396b3b 100644
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql
@@ -57,3 +57,12 @@ UNION ALL
SELECT * FROM ${stats_db_name}.software_sources
UNION ALL
SELECT * FROM ${stats_db_name}.otherresearchproduct_sources;
+--
+-- ANALYZE TABLE ${stats_db_name}.publication_sources COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.publication_sources COMPUTE STATISTICS FOR COLUMNS;
+-- ANALYZE TABLE ${stats_db_name}.dataset_sources COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.dataset_sources COMPUTE STATISTICS FOR COLUMNS;
+-- ANALYZE TABLE ${stats_db_name}.software_sources COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.software_sources COMPUTE STATISTICS FOR COLUMNS;
+-- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_sources COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_sources COMPUTE STATISTICS FOR COLUMNS;
\ No newline at end of file
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step14.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step14.sql
index 4a56b5d68..00a6913bc 100644
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step14.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step14.sql
@@ -47,3 +47,16 @@ FROM (
SELECT substr(d.id, 4) id
from ${openaire_db_name}.datasource d
WHERE d.datainfo.deletedbyinference=false) d on o.datasource = d.id;
+
+-- ANALYZE TABLE ${stats_db_name}.publication_licenses COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.publication_licenses COMPUTE STATISTICS FOR COLUMNS;
+-- ANALYZE TABLE ${stats_db_name}.dataset_licenses COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.dataset_licenses COMPUTE STATISTICS FOR COLUMNS;
+-- ANALYZE TABLE ${stats_db_name}.software_licenses COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.software_licenses COMPUTE STATISTICS FOR COLUMNS;
+-- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_licenses COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_licenses COMPUTE STATISTICS FOR COLUMNS;
+-- ANALYZE TABLE ${stats_db_name}.organization_pids COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.organization_pids COMPUTE STATISTICS FOR COLUMNS;
+-- ANALYZE TABLE ${stats_db_name}.organization_sources COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.organization_sources COMPUTE STATISTICS FOR COLUMNS;
\ No newline at end of file
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql
index 60b37048b..8f364d747 100644
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql
@@ -34,3 +34,12 @@ union all
select * from ${stats_db_name}.software_refereed
union all
select * from ${stats_db_name}.otherresearchproduct_refereed;
+--
+-- ANALYZE TABLE ${stats_db_name}.publication_refereed COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.publication_refereed COMPUTE STATISTICS FOR COLUMNS;
+-- ANALYZE TABLE ${stats_db_name}.dataset_refereed COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.dataset_refereed COMPUTE STATISTICS FOR COLUMNS;
+-- ANALYZE TABLE ${stats_db_name}.software_refereed COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.software_refereed COMPUTE STATISTICS FOR COLUMNS;
+-- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_refereed COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_refereed COMPUTE STATISTICS FOR COLUMNS;
\ No newline at end of file
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16.sql
index 33849b960..833deff73 100644
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16.sql
@@ -77,4 +77,15 @@ join ${stats_db_name}.result_datasources rd on rd.id=r.id
join ${stats_db_name}.datasource d on d.id=rd.datasource
join ${stats_db_name}.datasource_organizations dor on dor.id=d.id
join ${stats_db_name}.organization o on o.id=dor.organization
-where o.country is not null and o.country!='';
\ No newline at end of file
+where o.country is not null and o.country!='';
+
+-- ANALYZE TABLE ${stats_db_name}.result_peerreviewed COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.result_peerreviewed COMPUTE STATISTICS FOR COLUMNS;
+-- ANALYZE TABLE ${stats_db_name}.result_greenoa COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.result_greenoa COMPUTE STATISTICS FOR COLUMNS;
+-- ANALYZE TABLE ${stats_db_name}.result_gold COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.result_gold COMPUTE STATISTICS FOR COLUMNS;
+-- ANALYZE TABLE ${stats_db_name}.result_affiliated_country COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.result_affiliated_country COMPUTE STATISTICS FOR COLUMNS;
+-- ANALYZE TABLE ${stats_db_name}.result_deposited_country COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.result_deposited_country COMPUTE STATISTICS FOR COLUMNS;
\ No newline at end of file
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_5.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_5.sql
index f737c1ea6..2bdc263ef 100644
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_5.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_5.sql
@@ -52,4 +52,7 @@ LEFT OUTER JOIN ${stats_db_name}.result_gold gold on gold.id=r.id;
drop table if exists ${stats_db_name}.result;
drop view if exists ${stats_db_name}.result;
create table ${stats_db_name}.result stored as parquet as select * from ${stats_db_name}.result_tmp;
-drop table ${stats_db_name}.result_tmp;
\ No newline at end of file
+drop table ${stats_db_name}.result_tmp;
+--
+-- ANALYZE TABLE ${stats_db_name}.result COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.result COMPUTE STATISTICS FOR COLUMNS;
\ No newline at end of file
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_6.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_6.sql
index ced7bbc11..528aaff52 100644
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_6.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_6.sql
@@ -29,4 +29,11 @@ select rcount.pid, sum(case when rcount.type='publication' then rcount.count els
from rcount
group by rcount.pid;
-create view ${stats_db_name}.rndexpenditure as select * from stats_ext.rndexpediture
\ No newline at end of file
+create view ${stats_db_name}.rndexpenditure as select * from stats_ext.rndexpediture;
+--
+-- ANALYZE TABLE ${stats_db_name}.result_projectcount COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.result_projectcount COMPUTE STATISTICS FOR COLUMNS;
+-- ANALYZE TABLE ${stats_db_name}.result_fundercount COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.result_fundercount COMPUTE STATISTICS FOR COLUMNS;
+-- ANALYZE TABLE ${stats_db_name}.project_resultcount COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.project_resultcount COMPUTE STATISTICS FOR COLUMNS;
\ No newline at end of file
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step17.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step17.sql
deleted file mode 100644
index 5c102d014..000000000
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step17.sql
+++ /dev/null
@@ -1,207 +0,0 @@
-------------------------------------------------------
-------------------------------------------------------
--- Shadow schema table exchange
-------------------------------------------------------
-------------------------------------------------------
-
--- Dropping old views
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.category;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.concept;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.context;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.country;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.countrygdp;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.creation_date;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.dataset;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.dataset_citations;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.dataset_classifications;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.dataset_concepts;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.dataset_datasources;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.dataset_languages;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.dataset_licenses;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.dataset_oids;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.dataset_pids;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.dataset_refereed;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.dataset_sources;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.dataset_topics;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.datasource;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.datasource_languages;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.datasource_oids;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.datasource_organizations;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.datasource_results;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.datasource_sources;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.funder;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.fundref;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.numbers_country;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.organization;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.organization_datasources;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.organization_pids;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.organization_projects;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.organization_sources;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.otherresearchproduct;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.otherresearchproduct_citations;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.otherresearchproduct_classifications;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.otherresearchproduct_concepts;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.otherresearchproduct_datasources;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.otherresearchproduct_languages;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.otherresearchproduct_licenses;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.otherresearchproduct_oids;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.otherresearchproduct_pids;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.otherresearchproduct_refereed;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.otherresearchproduct_sources;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.otherresearchproduct_topics;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.project;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.project_oids;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.project_organizations;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.project_results;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.project_resultcount;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.project_results_publication;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.publication;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.publication_citations;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.publication_classifications;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.publication_concepts;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.publication_datasources;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.publication_languages;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.publication_licenses;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.publication_oids;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.publication_pids;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.publication_refereed;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.publication_sources;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.publication_topics;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.result;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_affiliated_country;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_citations;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_classifications;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_concepts;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_datasources;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_deposited_country;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_fundercount;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_gold;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_greenoa;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_languages;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_licenses;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_oids;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_organization;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_peerreviewed;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_pids;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_projectcount;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_projects;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_refereed;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_sources;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_topics;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.rndexpediture;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.roarmap;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.software;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.software_citations;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.software_classifications;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.software_concepts;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.software_datasources;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.software_languages;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.software_licenses;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.software_oids;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.software_pids;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.software_refereed;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.software_sources;
-DROP VIEW IF EXISTS ${stats_db_shadow_name}.software_topics;
-
-
--- Creating the shadow database, in case it doesn't exist
-CREATE database IF NOT EXISTS ${stats_db_shadow_name};
-
--- Creating new views
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.category AS SELECT * FROM ${stats_db_name}.category;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.concept AS SELECT * FROM ${stats_db_name}.concept;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.context AS SELECT * FROM ${stats_db_name}.context;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.country AS SELECT * FROM ${stats_db_name}.country;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.countrygdp AS SELECT * FROM ${stats_db_name}.countrygdp;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.creation_date AS SELECT * FROM ${stats_db_name}.creation_date;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.dataset AS SELECT * FROM ${stats_db_name}.dataset;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.dataset_citations AS SELECT * FROM ${stats_db_name}.dataset_citations;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.dataset_classifications AS SELECT * FROM ${stats_db_name}.dataset_classifications;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.dataset_concepts AS SELECT * FROM ${stats_db_name}.dataset_concepts;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.dataset_datasources AS SELECT * FROM ${stats_db_name}.dataset_datasources;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.dataset_languages AS SELECT * FROM ${stats_db_name}.dataset_languages;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.dataset_licenses AS SELECT * FROM ${stats_db_name}.dataset_licenses;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.dataset_oids AS SELECT * FROM ${stats_db_name}.dataset_oids;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.dataset_pids AS SELECT * FROM ${stats_db_name}.dataset_pids;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.dataset_refereed AS SELECT * FROM ${stats_db_name}.dataset_refereed;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.dataset_sources AS SELECT * FROM ${stats_db_name}.dataset_sources;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.dataset_topics AS SELECT * FROM ${stats_db_name}.dataset_topics;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.datasource AS SELECT * FROM ${stats_db_name}.datasource;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.datasource_languages AS SELECT * FROM ${stats_db_name}.datasource_languages;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.datasource_oids AS SELECT * FROM ${stats_db_name}.datasource_oids;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.datasource_organizations AS SELECT * FROM ${stats_db_name}.datasource_organizations;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.datasource_results AS SELECT * FROM ${stats_db_name}.datasource_results;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.datasource_sources AS SELECT * FROM ${stats_db_name}.datasource_sources;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.funder AS SELECT * FROM ${stats_db_name}.funder;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.fundref AS SELECT * FROM ${stats_db_name}.fundref;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.numbers_country AS SELECT * FROM ${stats_db_name}.numbers_country;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.organization AS SELECT * FROM ${stats_db_name}.organization;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.organization_datasources AS SELECT * FROM ${stats_db_name}.organization_datasources;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.organization_pids AS SELECT * FROM ${stats_db_name}.organization_pids;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.organization_projects AS SELECT * FROM ${stats_db_name}.organization_projects;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.organization_sources AS SELECT * FROM ${stats_db_name}.organization_sources;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.otherresearchproduct AS SELECT * FROM ${stats_db_name}.otherresearchproduct;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.otherresearchproduct_citations AS SELECT * FROM ${stats_db_name}.otherresearchproduct_citations;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.otherresearchproduct_classifications AS SELECT * FROM ${stats_db_name}.otherresearchproduct_classifications;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.otherresearchproduct_concepts AS SELECT * FROM ${stats_db_name}.otherresearchproduct_concepts;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.otherresearchproduct_datasources AS SELECT * FROM ${stats_db_name}.otherresearchproduct_datasources;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.otherresearchproduct_languages AS SELECT * FROM ${stats_db_name}.otherresearchproduct_languages;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.otherresearchproduct_licenses AS SELECT * FROM ${stats_db_name}.otherresearchproduct_licenses;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.otherresearchproduct_oids AS SELECT * FROM ${stats_db_name}.otherresearchproduct_oids;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.otherresearchproduct_pids AS SELECT * FROM ${stats_db_name}.otherresearchproduct_pids;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.otherresearchproduct_refereed AS SELECT * FROM ${stats_db_name}.otherresearchproduct_refereed;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.otherresearchproduct_sources AS SELECT * FROM ${stats_db_name}.otherresearchproduct_sources;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.otherresearchproduct_topics AS SELECT * FROM ${stats_db_name}.otherresearchproduct_topics;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.project AS SELECT * FROM ${stats_db_name}.project;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.project_oids AS SELECT * FROM ${stats_db_name}.project_oids;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.project_organizations AS SELECT * FROM ${stats_db_name}.project_organizations;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.project_results AS SELECT * FROM ${stats_db_name}.project_results;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.project_resultcount AS SELECT * FROM ${stats_db_name}.project_resultcount;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.project_results_publication AS SELECT * FROM ${stats_db_name}.project_results_publication;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.publication AS SELECT * FROM ${stats_db_name}.publication;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.publication_citations AS SELECT * FROM ${stats_db_name}.publication_citations;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.publication_classifications AS SELECT * FROM ${stats_db_name}.publication_classifications;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.publication_concepts AS SELECT * FROM ${stats_db_name}.publication_concepts;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.publication_datasources AS SELECT * FROM ${stats_db_name}.publication_datasources;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.publication_languages AS SELECT * FROM ${stats_db_name}.publication_languages;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.publication_licenses AS SELECT * FROM ${stats_db_name}.publication_licenses;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.publication_oids AS SELECT * FROM ${stats_db_name}.publication_oids;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.publication_pids AS SELECT * FROM ${stats_db_name}.publication_pids;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.publication_refereed AS SELECT * FROM ${stats_db_name}.publication_refereed;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.publication_sources AS SELECT * FROM ${stats_db_name}.publication_sources;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.publication_topics AS SELECT * FROM ${stats_db_name}.publication_topics;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result AS SELECT * FROM ${stats_db_name}.result;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_affiliated_country AS SELECT * FROM ${stats_db_name}.result_affiliated_country;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_citations AS SELECT * FROM ${stats_db_name}.result_citations;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_classifications AS SELECT * FROM ${stats_db_name}.result_classifications;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_concepts AS SELECT * FROM ${stats_db_name}.result_concepts;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_datasources AS SELECT * FROM ${stats_db_name}.result_datasources;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_deposited_country AS SELECT * FROM ${stats_db_name}.result_deposited_country;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_fundercount AS SELECT * FROM ${stats_db_name}.result_fundercount;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_gold AS SELECT * FROM ${stats_db_name}.result_gold;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_greenoa AS SELECT * FROM ${stats_db_name}.result_greenoa;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_languages AS SELECT * FROM ${stats_db_name}.result_languages;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_licenses AS SELECT * FROM ${stats_db_name}.result_licenses;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_oids AS SELECT * FROM ${stats_db_name}.result_oids;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_organization AS SELECT * FROM ${stats_db_name}.result_organization;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_peerreviewed AS SELECT * FROM ${stats_db_name}.result_peerreviewed;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_pids AS SELECT * FROM ${stats_db_name}.result_pids;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_projectcount AS SELECT * FROM ${stats_db_name}.result_projectcount;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_projects AS SELECT * FROM ${stats_db_name}.result_projects;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_refereed AS SELECT * FROM ${stats_db_name}.result_refereed;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_sources AS SELECT * FROM ${stats_db_name}.result_sources;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_topics AS SELECT * FROM ${stats_db_name}.result_topics;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.rndexpediture AS SELECT * FROM ${stats_db_name}.rndexpediture;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.roarmap AS SELECT * FROM ${stats_db_name}.roarmap;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.software AS SELECT * FROM ${stats_db_name}.software;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.software_citations AS SELECT * FROM ${stats_db_name}.software_citations;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.software_classifications AS SELECT * FROM ${stats_db_name}.software_classifications;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.software_concepts AS SELECT * FROM ${stats_db_name}.software_concepts;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.software_datasources AS SELECT * FROM ${stats_db_name}.software_datasources;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.software_languages AS SELECT * FROM ${stats_db_name}.software_languages;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.software_licenses AS SELECT * FROM ${stats_db_name}.software_licenses;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.software_oids AS SELECT * FROM ${stats_db_name}.software_oids;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.software_pids AS SELECT * FROM ${stats_db_name}.software_pids;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.software_refereed AS SELECT * FROM ${stats_db_name}.software_refereed;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.software_sources AS SELECT * FROM ${stats_db_name}.software_sources;
-CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.software_topics AS SELECT * FROM ${stats_db_name}.software_topics;
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step18.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step18.sql
deleted file mode 100644
index 34e48a18a..000000000
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step18.sql
+++ /dev/null
@@ -1,8 +0,0 @@
-------------------------------------------------------
-------------------------------------------------------
--- Impala table statistics - Needed to make the tables
--- visible for impala
-------------------------------------------------------
-------------------------------------------------------
-
-INVALIDATE METADATA ${stats_db_name};
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step19.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step19.sql
deleted file mode 100644
index 34e48a18a..000000000
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step19.sql
+++ /dev/null
@@ -1,8 +0,0 @@
-------------------------------------------------------
-------------------------------------------------------
--- Impala table statistics - Needed to make the tables
--- visible for impala
-------------------------------------------------------
-------------------------------------------------------
-
-INVALIDATE METADATA ${stats_db_name};
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step2.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step2.sql
index ba0db25be..62a158560 100644
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step2.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step2.sql
@@ -5,40 +5,109 @@
--------------------------------------------------------------
-- Publication temporary table
-DROP TABLE IF EXISTS ${stats_db_name}.publication_tmp;
+CREATE TABLE ${stats_db_name}.publication_tmp
+(
+ id STRING,
+ title STRING,
+ publisher STRING,
+ journal STRING,
+ date STRING,
+ year STRING,
+ bestlicence STRING,
+ embargo_end_date STRING,
+ delayed BOOLEAN,
+ authors INT,
+ source STRING,
+ abstract BOOLEAN,
+ type STRING
+)
+ clustered by (id) into 100 buckets stored as orc tblproperties ('transactional' = 'true');
-CREATE TABLE ${stats_db_name}.publication_tmp (id STRING, title STRING, publisher STRING, journal STRING, date STRING, year STRING, bestlicence STRING, embargo_end_date STRING, delayed BOOLEAN, authors INT, source STRING, abstract BOOLEAN, type STRING ) clustered by (id) into 100 buckets stored as orc tblproperties('transactional'='true');
-
-INSERT INTO ${stats_db_name}.publication_tmp SELECT substr(p.id, 4) as id, p.title[0].value as title, p.publisher.value as publisher, p.journal.name as journal ,
-p.dateofacceptance.value as date, date_format(p.dateofacceptance.value,'yyyy') as year, p.bestaccessright.classname as bestlicence,
-p.embargoenddate.value as embargo_end_date, false as delayed, size(p.author) as authors , concat_ws('\u003B',p.source.value) as source,
-case when size(p.description) > 0 then true else false end as abstract,
-'publication' as type
+INSERT INTO ${stats_db_name}.publication_tmp
+SELECT substr(p.id, 4) as id,
+ p.title[0].value as title,
+ p.publisher.value as publisher,
+ p.journal.name as journal,
+ p.dateofacceptance.value as date,
+ date_format(p.dateofacceptance.value, 'yyyy') as year,
+ p.bestaccessright.classname as bestlicence,
+ p.embargoenddate.value as embargo_end_date,
+ false as delayed,
+ size(p.author) as authors,
+ concat_ws('\u003B', p.source.value) as source,
+ case when size(p.description) > 0 then true else false end as abstract,
+ 'publication' as type
from ${openaire_db_name}.publication p
-where p.datainfo.deletedbyinference=false;
+where p.datainfo.deletedbyinference = false;
-CREATE TABLE ${stats_db_name}.publication_classifications AS SELECT substr(p.id, 4) as id, instancetype.classname as type from ${openaire_db_name}.publication p LATERAL VIEW explode(p.instance.instancetype) instances as instancetype where p.datainfo.deletedbyinference=false;
+CREATE TABLE ${stats_db_name}.publication_classifications AS
+SELECT substr(p.id, 4) as id, instancetype.classname as type
+from ${openaire_db_name}.publication p
+ LATERAL VIEW explode(p.instance.instancetype) instances as instancetype
+where p.datainfo.deletedbyinference = false;
-CREATE TABLE ${stats_db_name}.publication_concepts AS SELECT substr(p.id, 4) as id, contexts.context.id as concept from ${openaire_db_name}.publication p LATERAL VIEW explode(p.context) contexts as context where p.datainfo.deletedbyinference=false;
+CREATE TABLE ${stats_db_name}.publication_concepts AS
+SELECT substr(p.id, 4) as id, contexts.context.id as concept
+from ${openaire_db_name}.publication p
+ LATERAL VIEW explode(p.context) contexts as context
+where p.datainfo.deletedbyinference = false;
CREATE TABLE ${stats_db_name}.publication_datasources as
SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource
- FROM (
- SELECT substr(p.id, 4) as id, substr(instances.instance.hostedby.key, 4) as datasource
- from ${openaire_db_name}.publication p lateral view explode(p.instance) instances as instance
- where p.datainfo.deletedbyinference=false ) p
- LEFT OUTER JOIN (
- SELECT substr(d.id, 4) id
- from ${openaire_db_name}.datasource d
- WHERE d.datainfo.deletedbyinference=false ) d on p.datasource = d.id;
+FROM (
+ SELECT substr(p.id, 4) as id, substr(instances.instance.hostedby.key, 4) as datasource
+ from ${openaire_db_name}.publication p lateral view explode(p.instance) instances as instance
+ where p.datainfo.deletedbyinference = false) p
+ LEFT OUTER JOIN (
+ SELECT substr(d.id, 4) id
+ from ${openaire_db_name}.datasource d
+ WHERE d.datainfo.deletedbyinference = false) d on p.datasource = d.id;
-CREATE TABLE ${stats_db_name}.publication_languages AS select substr(p.id, 4) as id, p.language.classname as language FROM ${openaire_db_name}.publication p where p.datainfo.deletedbyinference=false;
+CREATE TABLE ${stats_db_name}.publication_languages AS
+select substr(p.id, 4) as id, p.language.classname as language
+FROM ${openaire_db_name}.publication p
+where p.datainfo.deletedbyinference = false;
-CREATE TABLE ${stats_db_name}.publication_oids AS SELECT substr(p.id, 4) AS id, oids.ids AS oid FROM ${openaire_db_name}.publication p LATERAL VIEW explode(p.originalid) oids AS ids where p.datainfo.deletedbyinference=false;
+CREATE TABLE ${stats_db_name}.publication_oids AS
+SELECT substr(p.id, 4) AS id, oids.ids AS oid
+FROM ${openaire_db_name}.publication p
+ LATERAL VIEW explode(p.originalid) oids AS ids
+where p.datainfo.deletedbyinference = false;
-CREATE TABLE ${stats_db_name}.publication_pids AS SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value as pid FROM ${openaire_db_name}.publication p LATERAL VIEW explode(p.pid) pids AS ppid where p.datainfo.deletedbyinference=false;
+CREATE TABLE ${stats_db_name}.publication_pids AS
+SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value as pid
+FROM ${openaire_db_name}.publication p
+ LATERAL VIEW explode(p.pid) pids AS ppid
+where p.datainfo.deletedbyinference = false;
-CREATE TABLE ${stats_db_name}.publication_topics as select substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS TYPE, subjects.subject.value AS topic FROM ${openaire_db_name}.publication p LATERAL VIEW explode(p.subject) subjects AS subject where p.datainfo.deletedbyinference=false;
+CREATE TABLE ${stats_db_name}.publication_topics as
+select substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS TYPE, subjects.subject.value AS topic
+FROM ${openaire_db_name}.publication p
+ LATERAL VIEW explode(p.subject) subjects AS subject
+where p.datainfo.deletedbyinference = false;
--- Publication_citations
-CREATE TABLE ${stats_db_name}.publication_citations AS SELECT substr(p.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS result FROM ${openaire_db_name}.publication p lateral view explode(p.extrainfo) citations AS citation WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") !="" and p.datainfo.deletedbyinference=false;
\ No newline at end of file
+CREATE TABLE ${stats_db_name}.publication_citations AS
+SELECT substr(p.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS result
+FROM ${openaire_db_name}.publication p
+ lateral view explode(p.extrainfo) citations AS citation
+WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != ""
+ and p.datainfo.deletedbyinference = false;
+
+-- ANALYZE TABLE ${stats_db_name}.publication_tmp COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.publication_tmp COMPUTE STATISTICS FOR COLUMNS;
+-- ANALYZE TABLE ${stats_db_name}.publication_classifications COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.publication_classifications COMPUTE STATISTICS FOR COLUMNS;
+-- ANALYZE TABLE ${stats_db_name}.publication_concepts COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.publication_concepts COMPUTE STATISTICS FOR COLUMNS;
+-- ANALYZE TABLE ${stats_db_name}.publication_datasources COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.publication_datasources COMPUTE STATISTICS FOR COLUMNS;
+-- ANALYZE TABLE ${stats_db_name}.publication_languages COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.publication_languages COMPUTE STATISTICS FOR COLUMNS;
+-- ANALYZE TABLE ${stats_db_name}.publication_oids COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.publication_oids COMPUTE STATISTICS FOR COLUMNS;
+-- ANALYZE TABLE ${stats_db_name}.publication_pids COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.publication_pids COMPUTE STATISTICS FOR COLUMNS;
+-- ANALYZE TABLE ${stats_db_name}.publication_topics COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.publication_topics COMPUTE STATISTICS FOR COLUMNS;
+-- ANALYZE TABLE ${stats_db_name}.publication_citations COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.publication_citations COMPUTE STATISTICS FOR COLUMNS;
\ No newline at end of file
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql
new file mode 100644
index 000000000..9477ada12
--- /dev/null
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql
@@ -0,0 +1,121 @@
+drop database if exists TARGET cascade;
+create database if not exists TARGET;
+
+create view if not exists TARGET.category as select * from SOURCE.category;
+create view if not exists TARGET.concept as select * from SOURCE.concept;
+create view if not exists TARGET.context as select * from SOURCE.context;
+create view if not exists TARGET.country as select * from SOURCE.country;
+create view if not exists TARGET.countrygdp as select * from SOURCE.countrygdp;
+create view if not exists TARGET.creation_date as select * from SOURCE.creation_date;
+create view if not exists TARGET.funder as select * from SOURCE.funder;
+create view if not exists TARGET.fundref as select * from SOURCE.fundref;
+create view if not exists TARGET.rndexpenditure as select * from SOURCE.rndexpediture;
+--create view if not exists TARGET.roarmap as select * from SOURCE.roarmap;
+
+create table TARGET.result as
+ select distinct * from (
+ select * from SOURCE.result r where exists (select 1 from SOURCE.result_projects rp join SOURCE.project p on rp.project=p.id where rp.id=r.id)
+ union all
+ select * from SOURCE.result r where exists (select 1 from SOURCE.result_concepts rc where rc.id=r.id) ) foo;
+compute stats TARGET.result;
+
+create table TARGET.result_affiliated_country as select * from SOURCE.result_affiliated_country rac where exists (select 1 from TARGET.result r where r.id=rac.id);
+compute stats TARGET.result_affiliated_country;
+
+create table TARGET.result_citations as select * from SOURCE.result_citations orig where exists (select 1 from TARGET.result r where r.id=orig.id);
+compute stats TARGET.result_citations;
+
+create table TARGET.result_classifications as select * from SOURCE.result_classifications orig where exists (select 1 from TARGET.result r where r.id=orig.id);
+compute stats TARGET.result_classifications;
+
+create table TARGET.result_concepts as select * from SOURCE.result_concepts orig where exists (select 1 from TARGET.result r where r.id=orig.id);
+compute stats TARGET.result_concepts;
+
+create table TARGET.result_datasources as select * from SOURCE.result_datasources orig where exists (select 1 from TARGET.result r where r.id=orig.id);
+compute stats TARGET.result_datasources;
+
+create table TARGET.result_deposited_country as select * from SOURCE.result_deposited_country orig where exists (select 1 from TARGET.result r where r.id=orig.id);
+compute stats TARGET.result_deposited_country;
+
+create table TARGET.result_fundercount as select * from SOURCE.result_fundercount orig where exists (select 1 from TARGET.result r where r.id=orig.id);
+compute stats TARGET.result_fundercount;
+
+create table TARGET.result_gold as select * from SOURCE.result_gold orig where exists (select 1 from TARGET.result r where r.id=orig.id);
+compute stats TARGET.result_gold;
+
+create table TARGET.result_greenoa as select * from SOURCE.result_greenoa orig where exists (select 1 from TARGET.result r where r.id=orig.id);
+compute stats TARGET.result_greenoa;
+
+create table TARGET.result_languages as select * from SOURCE.result_languages orig where exists (select 1 from TARGET.result r where r.id=orig.id);
+compute stats TARGET.result_languages;
+
+create table TARGET.result_licences as select * from SOURCE.result_licenses orig where exists (select 1 from TARGET.result r where r.id=orig.id);
+compute stats TARGET.result_licences;
+
+create table TARGET.result_oids as select * from SOURCE.result_oids orig where exists (select 1 from TARGET.result r where r.id=orig.id);
+compute stats TARGET.result_oids;
+
+create table TARGET.result_organization as select * from SOURCE.result_organization orig where exists (select 1 from TARGET.result r where r.id=orig.id);
+compute stats TARGET.result_organization;
+
+create table TARGET.result_peerreviewed as select * from SOURCE.result_peerreviewed orig where exists (select 1 from TARGET.result r where r.id=orig.id);
+compute stats TARGET.result_peerreviewed;
+
+create table TARGET.result_pids as select * from SOURCE.result_pids orig where exists (select 1 from TARGET.result r where r.id=orig.id);
+compute stats TARGET.result_pids;
+
+create table TARGET.result_projectcount as select * from SOURCE.result_projectcount orig where exists (select 1 from TARGET.result r where r.id=orig.id);
+compute stats TARGET.result_projectcount;
+
+create table TARGET.result_projects as select * from SOURCE.result_projects orig where exists (select 1 from TARGET.result r where r.id=orig.id);
+compute stats TARGET.result_projects;
+
+create table TARGET.result_refereed as select * from SOURCE.result_refereed orig where exists (select 1 from TARGET.result r where r.id=orig.id);
+compute stats TARGET.result_refereed;
+
+create table TARGET.result_sources as select * from SOURCE.result_sources orig where exists (select 1 from TARGET.result r where r.id=orig.id);
+compute stats TARGET.result_sources;
+
+create table TARGET.result_topics as select * from SOURCE.result_topics orig where exists (select 1 from TARGET.result r where r.id=orig.id);
+compute stats TARGET.result_topics;
+
+-- datasources
+create view if not exists TARGET.datasource as select * from SOURCE.datasource;
+create view if not exists TARGET.datasource_oids as select * from SOURCE.datasource_oids;
+create view if not exists TARGET.datasource_organizations as select * from SOURCE.datasource_organizations;
+create view if not exists TARGET.datasource_sources as select * from SOURCE.datasource_sources;
+
+create table TARGET.datasource_results as select id as result, datasource as id from TARGET.result_datasources;
+compute stats TARGET.datasource_results;
+
+-- organizations
+create view if not exists TARGET.organization as select * from SOURCE.organization;
+create view if not exists TARGET.organization_datasources as select * from SOURCE.organization_datasources;
+create view if not exists TARGET.organization_pids as select * from SOURCE.organization_pids;
+create view if not exists TARGET.organization_projects as select * from SOURCE.organization_projects;
+create view if not exists TARGET.organization_sources as select * from SOURCE.organization_sources;
+
+-- projects
+create view if not exists TARGET.project as select * from SOURCE.project;
+create view if not exists TARGET.project_oids as select * from SOURCE.project_oids;
+create view if not exists TARGET.project_organizations as select * from SOURCE.project_organizations;
+create view if not exists TARGET.project_resultcount as select * from SOURCE.project_resultcount;
+
+create table TARGET.project_results as select id as result, project as id from TARGET.result_projects;
+compute stats TARGET.project_results;
+
+--denorm
+alter table TARGET.result rename to TARGET.res_tmp;
+
+create table TARGET.result_denorm as
+ select distinct r.*, rp.project, p.acronym as pacronym, p.title as ptitle, p.funder as pfunder, p.funding_lvl0 as pfunding_lvl0, rd.datasource, d.name as dname, d.type as dtype
+ from TARGET.res_tmp r
+ join TARGET.result_projects rp on rp.id=r.id
+ join TARGET.result_datasources rd on rd.id=r.id
+ join TARGET.project p on p.id=rp.project
+ join TARGET.datasource d on d.id=rd.datasource;
+compute stats TARGET.result_denorm;
+
+alter table TARGET.result_denorm rename to TARGET.result;
+drop table TARGET.res_tmp;
+--- done!
\ No newline at end of file
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step3.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step3.sql
index f69715a31..dcd5ad858 100644
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step3.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step3.sql
@@ -5,32 +5,108 @@
------------------------------------------------------
-- Dataset temporary table supporting updates
-DROP TABLE IF EXISTS ${stats_db_name}.dataset_tmp;
-CREATE TABLE ${stats_db_name}.dataset_tmp (id STRING, title STRING, publisher STRING, journal STRING, date STRING, year STRING, bestlicence STRING, embargo_end_date STRING, delayed BOOLEAN, authors INT, source STRING, abstract BOOLEAN, type STRING ) clustered by (id) into 100 buckets stored AS orc tblproperties('transactional'='true');
+CREATE TABLE ${stats_db_name}.dataset_tmp
+(
+ id STRING,
+ title STRING,
+ publisher STRING,
+ journal STRING,
+ date STRING,
+ year STRING,
+ bestlicence STRING,
+ embargo_end_date STRING,
+ delayed BOOLEAN,
+ authors INT,
+ source STRING,
+ abstract BOOLEAN,
+ type STRING
+)
+ clustered by (id) into 100 buckets stored AS orc tblproperties ('transactional' = 'true');
-INSERT INTO ${stats_db_name}.dataset_tmp SELECT substr(d.id, 4) AS id, d.title[0].value AS title, d.publisher.value AS publisher, cast(null AS string) AS journal,
-d.dateofacceptance.value as date, date_format(d.dateofacceptance.value,'yyyy') AS year, d.bestaccessright.classname AS bestlicence,
-d.embargoenddate.value AS embargo_end_date, false AS delayed, size(d.author) AS authors , concat_ws('\u003B',d.source.value) AS source,
- CASE WHEN SIZE(d.description) > 0 THEN TRUE ELSE FALSE end AS abstract,
-'dataset' AS type
+INSERT INTO ${stats_db_name}.dataset_tmp
+SELECT substr(d.id, 4) AS id,
+ d.title[0].value AS title,
+ d.publisher.value AS publisher,
+ cast(null AS string) AS journal,
+ d.dateofacceptance.value as date,
+ date_format(d.dateofacceptance.value, 'yyyy') AS year,
+ d.bestaccessright.classname AS bestlicence,
+ d.embargoenddate.value AS embargo_end_date,
+ false AS delayed,
+ size(d.author) AS authors,
+ concat_ws('\u003B', d.source.value) AS source,
+ CASE WHEN SIZE(d.description) > 0 THEN TRUE ELSE FALSE end AS abstract,
+ 'dataset' AS type
FROM ${openaire_db_name}.dataset d
-WHERE d.datainfo.deletedbyinference=FALSE;
+WHERE d.datainfo.deletedbyinference = FALSE;
--- Dataset_citations
-CREATE TABLE ${stats_db_name}.dataset_citations AS SELECT substr(d.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS result FROM ${openaire_db_name}.dataset d LATERAL VIEW explode(d.extrainfo) citations AS citation WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") !="" and d.datainfo.deletedbyinference=false;
+CREATE TABLE ${stats_db_name}.dataset_citations AS
+SELECT substr(d.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS result
+FROM ${openaire_db_name}.dataset d
+ LATERAL VIEW explode(d.extrainfo) citations AS citation
+WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != ""
+ and d.datainfo.deletedbyinference = false;
-CREATE TABLE ${stats_db_name}.dataset_classifications AS SELECT substr(p.id, 4) AS id, instancetype.classname AS type FROM ${openaire_db_name}.dataset p LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype where p.datainfo.deletedbyinference=false;
+CREATE TABLE ${stats_db_name}.dataset_classifications AS
+SELECT substr(p.id, 4) AS id, instancetype.classname AS type
+FROM ${openaire_db_name}.dataset p
+ LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype
+where p.datainfo.deletedbyinference = false;
-CREATE TABLE ${stats_db_name}.dataset_concepts AS SELECT substr(p.id, 4) as id, contexts.context.id as concept from ${openaire_db_name}.dataset p LATERAL VIEW explode(p.context) contexts as context where p.datainfo.deletedbyinference=false;
+CREATE TABLE ${stats_db_name}.dataset_concepts AS
+SELECT substr(p.id, 4) as id, contexts.context.id as concept
+from ${openaire_db_name}.dataset p
+ LATERAL VIEW explode(p.context) contexts as context
+where p.datainfo.deletedbyinference = false;
-CREATE TABLE ${stats_db_name}.dataset_datasources AS SELECT p.id, case when d.id IS NULL THEN 'other' ELSE p.datasource END AS datasource FROM (SELECT substr(p.id, 4) as id, substr(instances.instance.hostedby.key, 4) AS datasource
-FROM ${openaire_db_name}.dataset p LATERAL VIEW explode(p.instance) instances AS instance where p.datainfo.deletedbyinference=false) p LEFT OUTER JOIN
-(SELECT substr(d.id, 4) id FROM ${openaire_db_name}.datasource d WHERE d.datainfo.deletedbyinference=false) d ON p.datasource = d.id;
+CREATE TABLE ${stats_db_name}.dataset_datasources AS
+SELECT p.id, case when d.id IS NULL THEN 'other' ELSE p.datasource END AS datasource
+FROM (
+ SELECT substr(p.id, 4) as id, substr(instances.instance.hostedby.key, 4) AS datasource
+ FROM ${openaire_db_name}.dataset p
+ LATERAL VIEW explode(p.instance) instances AS instance
+ where p.datainfo.deletedbyinference = false) p
+ LEFT OUTER JOIN (
+ SELECT substr(d.id, 4) id
+ FROM ${openaire_db_name}.datasource d
+ WHERE d.datainfo.deletedbyinference = false) d ON p.datasource = d.id;
-CREATE TABLE ${stats_db_name}.dataset_languages AS SELECT substr(p.id, 4) AS id, p.language.classname AS language FROM ${openaire_db_name}.dataset p where p.datainfo.deletedbyinference=false;
+CREATE TABLE ${stats_db_name}.dataset_languages AS
+SELECT substr(p.id, 4) AS id, p.language.classname AS language
+FROM ${openaire_db_name}.dataset p
+where p.datainfo.deletedbyinference = false;
-CREATE TABLE ${stats_db_name}.dataset_oids AS SELECT substr(p.id, 4) AS id, oids.ids AS oid FROM ${openaire_db_name}.dataset p LATERAL VIEW explode(p.originalid) oids AS ids where p.datainfo.deletedbyinference=false;
+CREATE TABLE ${stats_db_name}.dataset_oids AS
+SELECT substr(p.id, 4) AS id, oids.ids AS oid
+FROM ${openaire_db_name}.dataset p
+ LATERAL VIEW explode(p.originalid) oids AS ids
+where p.datainfo.deletedbyinference = false;
-CREATE TABLE ${stats_db_name}.dataset_pids AS SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid FROM ${openaire_db_name}.dataset p LATERAL VIEW explode(p.pid) pids AS ppid where p.datainfo.deletedbyinference=false;
+CREATE TABLE ${stats_db_name}.dataset_pids AS
+SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid
+FROM ${openaire_db_name}.dataset p
+ LATERAL VIEW explode(p.pid) pids AS ppid
+where p.datainfo.deletedbyinference = false;
-CREATE TABLE ${stats_db_name}.dataset_topics AS SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic FROM ${openaire_db_name}.dataset p LATERAL VIEW explode(p.subject) subjects AS subject where p.datainfo.deletedbyinference=false;
+CREATE TABLE ${stats_db_name}.dataset_topics AS
+SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic
+FROM ${openaire_db_name}.dataset p
+ LATERAL VIEW explode(p.subject) subjects AS subject
+where p.datainfo.deletedbyinference = false;
+--
+-- ANALYZE TABLE ${stats_db_name}.dataset_tmp COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.dataset_tmp COMPUTE STATISTICS FOR COLUMNS;
+-- ANALYZE TABLE ${stats_db_name}.dataset_classifications COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.dataset_classifications COMPUTE STATISTICS FOR COLUMNS;
+-- ANALYZE TABLE ${stats_db_name}.dataset_concepts COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.dataset_concepts COMPUTE STATISTICS FOR COLUMNS;
+-- ANALYZE TABLE ${stats_db_name}.dataset_datasources COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.dataset_datasources COMPUTE STATISTICS FOR COLUMNS;
+-- ANALYZE TABLE ${stats_db_name}.dataset_languages COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.dataset_languages COMPUTE STATISTICS FOR COLUMNS;
+-- ANALYZE TABLE ${stats_db_name}.dataset_oids COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.dataset_oids COMPUTE STATISTICS FOR COLUMNS;
+-- ANALYZE TABLE ${stats_db_name}.dataset_pids COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.dataset_pids COMPUTE STATISTICS FOR COLUMNS;
+-- ANALYZE TABLE ${stats_db_name}.dataset_topics COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.dataset_topics COMPUTE STATISTICS FOR COLUMNS;
\ No newline at end of file
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step4.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step4.sql
index 2c4a625e1..fd5390e66 100644
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step4.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step4.sql
@@ -5,32 +5,108 @@
--------------------------------------------------------
-- Software temporary table supporting updates
-DROP TABLE IF EXISTS ${stats_db_name}.software_tmp;
-CREATE TABLE ${stats_db_name}.software_tmp (id STRING, title STRING, publisher STRING, journal STRING, date STRING, year STRING, bestlicence STRING, embargo_end_date STRING, delayed BOOLEAN, authors INT, source STRING, abstract BOOLEAN, type STRING ) clustered by (id) INTO 100 buckets stored AS orc tblproperties('transactional'='true');
+CREATE TABLE ${stats_db_name}.software_tmp
+(
+ id STRING,
+ title STRING,
+ publisher STRING,
+ journal STRING,
+ date STRING,
+ year STRING,
+ bestlicence STRING,
+ embargo_end_date STRING,
+ delayed BOOLEAN,
+ authors INT,
+ source STRING,
+ abstract BOOLEAN,
+ type STRING
+)
+ clustered by (id) INTO 100 buckets stored AS orc tblproperties ('transactional' = 'true');
-INSERT INTO ${stats_db_name}.software_tmp SELECT substr(s.id, 4) as id, s.title[0].value AS title, s.publisher.value AS publisher, CAST(NULL AS string) AS journal,
-s.dateofacceptance.value AS DATE, date_format(s.dateofacceptance.value,'yyyy') AS YEAR, s.bestaccessright.classname AS bestlicence,
-s.embargoenddate.value AS embargo_end_date, FALSE AS delayed, SIZE(s.author) AS authors , concat_ws('\u003B',s.source.value) AS source,
- CASE WHEN SIZE(s.description) > 0 THEN TRUE ELSE FALSE END AS abstract,
-'software' as type
+INSERT INTO ${stats_db_name}.software_tmp
+SELECT substr(s.id, 4) as id,
+ s.title[0].value AS title,
+ s.publisher.value AS publisher,
+ CAST(NULL AS string) AS journal,
+ s.dateofacceptance.value AS DATE,
+ date_format(s.dateofacceptance.value, 'yyyy') AS YEAR,
+ s.bestaccessright.classname AS bestlicence,
+ s.embargoenddate.value AS embargo_end_date,
+ FALSE AS delayed,
+ SIZE(s.author) AS authors,
+ concat_ws('\u003B', s.source.value) AS source,
+ CASE WHEN SIZE(s.description) > 0 THEN TRUE ELSE FALSE END AS abstract,
+ 'software' as type
from ${openaire_db_name}.software s
-where s.datainfo.deletedbyinference=false;
+where s.datainfo.deletedbyinference = false;
--- Software_citations
-CREATE TABLE ${stats_db_name}.software_citations AS SELECT substr(s.id, 4) as id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS RESULT FROM ${openaire_db_name}.software s LATERAL VIEW explode(s.extrainfo) citations as citation where xpath_string(citation.value, "//citation/id[@type='openaire']/@value") !="" and s.datainfo.deletedbyinference=false;
+CREATE TABLE ${stats_db_name}.software_citations AS
+SELECT substr(s.id, 4) as id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS RESULT
+FROM ${openaire_db_name}.software s
+ LATERAL VIEW explode(s.extrainfo) citations as citation
+where xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != ""
+ and s.datainfo.deletedbyinference = false;
-CREATE TABLE ${stats_db_name}.software_classifications AS SELECT substr(p.id, 4) AS id, instancetype.classname AS type FROM ${openaire_db_name}.software p LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype where p.datainfo.deletedbyinference=false;
+CREATE TABLE ${stats_db_name}.software_classifications AS
+SELECT substr(p.id, 4) AS id, instancetype.classname AS type
+FROM ${openaire_db_name}.software p
+ LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype
+where p.datainfo.deletedbyinference = false;
-CREATE TABLE ${stats_db_name}.software_concepts AS SELECT substr(p.id, 4) AS id, contexts.context.id AS concept FROM ${openaire_db_name}.software p LATERAL VIEW explode(p.context) contexts AS context where p.datainfo.deletedbyinference=false;
+CREATE TABLE ${stats_db_name}.software_concepts AS
+SELECT substr(p.id, 4) AS id, contexts.context.id AS concept
+FROM ${openaire_db_name}.software p
+ LATERAL VIEW explode(p.context) contexts AS context
+where p.datainfo.deletedbyinference = false;
-CREATE TABLE ${stats_db_name}.software_datasources AS SELECT p.id, CASE WHEN d.id IS NULL THEN 'other' ELSE p.datasource end as datasource FROM (SELECT substr(p.id, 4) AS id, substr(instances.instance.hostedby.key, 4) AS datasource
-FROM ${openaire_db_name}.software p LATERAL VIEW explode(p.instance) instances AS instance where p.datainfo.deletedbyinference=false) p LEFT OUTER JOIN
-(SELECT substr(d.id, 4) id FROM ${openaire_db_name}.datasource d WHERE d.datainfo.deletedbyinference=false) d ON p.datasource = d.id;
+CREATE TABLE ${stats_db_name}.software_datasources AS
+SELECT p.id, CASE WHEN d.id IS NULL THEN 'other' ELSE p.datasource end as datasource
+FROM (
+ SELECT substr(p.id, 4) AS id, substr(instances.instance.hostedby.key, 4) AS datasource
+ FROM ${openaire_db_name}.software p
+ LATERAL VIEW explode(p.instance) instances AS instance
+ where p.datainfo.deletedbyinference = false) p
+ LEFT OUTER JOIN (
+ SELECT substr(d.id, 4) id
+ FROM ${openaire_db_name}.datasource d
+ WHERE d.datainfo.deletedbyinference = false) d ON p.datasource = d.id;
-CREATE TABLE ${stats_db_name}.software_languages AS select substr(p.id, 4) AS id, p.language.classname AS language FROM ${openaire_db_name}.software p where p.datainfo.deletedbyinference=false;
+CREATE TABLE ${stats_db_name}.software_languages AS
+select substr(p.id, 4) AS id, p.language.classname AS language
+FROM ${openaire_db_name}.software p
+where p.datainfo.deletedbyinference = false;
-CREATE TABLE ${stats_db_name}.software_oids AS SELECT substr(p.id, 4) AS id, oids.ids AS oid FROM ${openaire_db_name}.software p LATERAL VIEW explode(p.originalid) oids AS ids where p.datainfo.deletedbyinference=false;
+CREATE TABLE ${stats_db_name}.software_oids AS
+SELECT substr(p.id, 4) AS id, oids.ids AS oid
+FROM ${openaire_db_name}.software p
+ LATERAL VIEW explode(p.originalid) oids AS ids
+where p.datainfo.deletedbyinference = false;
-CREATE TABLE ${stats_db_name}.software_pids AS SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid FROM ${openaire_db_name}.software p LATERAL VIEW explode(p.pid) pids AS ppid where p.datainfo.deletedbyinference=false;
+CREATE TABLE ${stats_db_name}.software_pids AS
+SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid
+FROM ${openaire_db_name}.software p
+ LATERAL VIEW explode(p.pid) pids AS ppid
+where p.datainfo.deletedbyinference = false;
-CREATE TABLE ${stats_db_name}.software_topics AS SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic FROM ${openaire_db_name}.software p LATERAL VIEW explode(p.subject) subjects AS subject where p.datainfo.deletedbyinference=false;
+CREATE TABLE ${stats_db_name}.software_topics AS
+SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic
+FROM ${openaire_db_name}.software p
+ LATERAL VIEW explode(p.subject) subjects AS subject
+where p.datainfo.deletedbyinference = false;
+--
+-- ANALYZE TABLE ${stats_db_name}.software_tmp COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.software_tmp COMPUTE STATISTICS FOR COLUMNS;
+-- ANALYZE TABLE ${stats_db_name}.software_classifications COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.software_classifications COMPUTE STATISTICS FOR COLUMNS;
+-- ANALYZE TABLE ${stats_db_name}.software_concepts COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.software_concepts COMPUTE STATISTICS FOR COLUMNS;
+-- ANALYZE TABLE ${stats_db_name}.software_datasources COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.software_datasources COMPUTE STATISTICS FOR COLUMNS;
+-- ANALYZE TABLE ${stats_db_name}.software_languages COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.software_languages COMPUTE STATISTICS FOR COLUMNS;
+-- ANALYZE TABLE ${stats_db_name}.software_oids COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.software_oids COMPUTE STATISTICS FOR COLUMNS;
+-- ANALYZE TABLE ${stats_db_name}.software_pids COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.software_pids COMPUTE STATISTICS FOR COLUMNS;
+-- ANALYZE TABLE ${stats_db_name}.software_topics COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.software_topics COMPUTE STATISTICS FOR COLUMNS;
\ No newline at end of file
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step5.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step5.sql
index 1fa5df8cb..b359b596f 100644
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step5.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step5.sql
@@ -5,32 +5,99 @@
--------------------------------------------------------------------------------
-- Otherresearchproduct temporary table supporting updates
-DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_tmp;
-CREATE TABLE ${stats_db_name}.otherresearchproduct_tmp ( id STRING, title STRING, publisher STRING, journal STRING, date STRING, year STRING, bestlicence STRING, embargo_end_date STRING, delayed BOOLEAN, authors INT, source STRING, abstract BOOLEAN, type STRING ) CLUSTERED BY (id) INTO 100 buckets stored AS orc tblproperties('transactional'='true');
+CREATE TABLE ${stats_db_name}.otherresearchproduct_tmp
+(
+ id STRING,
+ title STRING,
+ publisher STRING,
+ journal STRING,
+ date STRING,
+ year STRING,
+ bestlicence STRING,
+ embargo_end_date STRING,
+ delayed BOOLEAN,
+ authors INT,
+ source STRING,
+ abstract BOOLEAN,
+ type STRING
+) CLUSTERED BY (id) INTO 100 buckets stored AS orc tblproperties ('transactional' = 'true');
-INSERT INTO ${stats_db_name}.otherresearchproduct_tmp SELECT substr(o.id, 4) AS id, o.title[0].value AS title, o.publisher.value AS publisher, CAST(NULL AS string) AS journal,
-o.dateofacceptance.value AS DATE, date_format(o.dateofacceptance.value,'yyyy') AS year, o.bestaccessright.classname AS bestlicence,
-o.embargoenddate.value as embargo_end_date, FALSE AS delayed, SIZE(o.author) AS authors , concat_ws('\u003B',o.source.value) AS source,
-CASE WHEN SIZE(o.description) > 0 THEN TRUE ELSE FALSE END AS abstract,
-'other' AS type
+INSERT INTO ${stats_db_name}.otherresearchproduct_tmp
+SELECT substr(o.id, 4) AS id,
+ o.title[0].value AS title,
+ o.publisher.value AS publisher,
+ CAST(NULL AS string) AS journal,
+ o.dateofacceptance.value AS DATE,
+ date_format(o.dateofacceptance.value, 'yyyy') AS year,
+ o.bestaccessright.classname AS bestlicence,
+ o.embargoenddate.value as embargo_end_date,
+ FALSE AS delayed,
+ SIZE(o.author) AS authors,
+ concat_ws('\u003B', o.source.value) AS source,
+ CASE WHEN SIZE(o.description) > 0 THEN TRUE ELSE FALSE END AS abstract,
+ 'other' AS type
FROM ${openaire_db_name}.otherresearchproduct o
-WHERE o.datainfo.deletedbyinference=FALSE;
+WHERE o.datainfo.deletedbyinference = FALSE;
-- Otherresearchproduct_citations
-CREATE TABLE ${stats_db_name}.otherresearchproduct_citations AS SELECT substr(o.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS RESULT FROM ${openaire_db_name}.otherresearchproduct o LATERAL VIEW explode(o.extrainfo) citations AS citation WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") !="" and o.datainfo.deletedbyinference=false;
+CREATE TABLE ${stats_db_name}.otherresearchproduct_citations AS
+SELECT substr(o.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS RESULT
+FROM ${openaire_db_name}.otherresearchproduct o LATERAL VIEW explode(o.extrainfo) citations AS citation
+WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != ""
+ and o.datainfo.deletedbyinference = false;
-CREATE TABLE ${stats_db_name}.otherresearchproduct_classifications AS SELECT substr(p.id, 4) AS id, instancetype.classname AS type FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype where p.datainfo.deletedbyinference=false;
+CREATE TABLE ${stats_db_name}.otherresearchproduct_classifications AS
+SELECT substr(p.id, 4) AS id, instancetype.classname AS type
+FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype
+where p.datainfo.deletedbyinference = false;
-CREATE TABLE ${stats_db_name}.otherresearchproduct_concepts AS SELECT substr(p.id, 4) AS id, contexts.context.id AS concept FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.context) contexts AS context where p.datainfo.deletedbyinference=false;
+CREATE TABLE ${stats_db_name}.otherresearchproduct_concepts AS
+SELECT substr(p.id, 4) AS id, contexts.context.id AS concept
+FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.context) contexts AS context
+where p.datainfo.deletedbyinference = false;
-CREATE TABLE ${stats_db_name}.otherresearchproduct_datasources AS SELECT p.id, CASE WHEN d.id IS NULL THEN 'other' ELSE p.datasource END AS datasource FROM (SELECT substr(p.id, 4) AS id, substr(instances.instance.hostedby.key, 4) AS datasource
-from ${openaire_db_name}.otherresearchproduct p lateral view explode(p.instance) instances as instance where p.datainfo.deletedbyinference=false) p LEFT OUTER JOIN
-(SELECT substr(d.id, 4) id from ${openaire_db_name}.datasource d WHERE d.datainfo.deletedbyinference=false) d on p.datasource = d.id;
+CREATE TABLE ${stats_db_name}.otherresearchproduct_datasources AS
+SELECT p.id, CASE WHEN d.id IS NULL THEN 'other' ELSE p.datasource END AS datasource
+FROM (SELECT substr(p.id, 4) AS id, substr(instances.instance.hostedby.key, 4) AS datasource
+ from ${openaire_db_name}.otherresearchproduct p lateral view explode(p.instance) instances as instance
+ where p.datainfo.deletedbyinference = false) p
+ LEFT OUTER JOIN(SELECT substr(d.id, 4) id
+ from ${openaire_db_name}.datasource d
+ WHERE d.datainfo.deletedbyinference = false) d on p.datasource = d.id;
-CREATE TABLE ${stats_db_name}.otherresearchproduct_languages AS SELECT substr(p.id, 4) AS id, p.language.classname AS language FROM ${openaire_db_name}.otherresearchproduct p where p.datainfo.deletedbyinference=false;
+CREATE TABLE ${stats_db_name}.otherresearchproduct_languages AS
+SELECT substr(p.id, 4) AS id, p.language.classname AS language
+FROM ${openaire_db_name}.otherresearchproduct p
+where p.datainfo.deletedbyinference = false;
-CREATE TABLE ${stats_db_name}.otherresearchproduct_oids AS SELECT substr(p.id, 4) AS id, oids.ids AS oid FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.originalid) oids AS ids where p.datainfo.deletedbyinference=false;
+CREATE TABLE ${stats_db_name}.otherresearchproduct_oids AS
+SELECT substr(p.id, 4) AS id, oids.ids AS oid
+FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.originalid) oids AS ids
+where p.datainfo.deletedbyinference = false;
-CREATE TABLE ${stats_db_name}.otherresearchproduct_pids AS SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.pid) pids AS ppid where p.datainfo.deletedbyinference=false;
+CREATE TABLE ${stats_db_name}.otherresearchproduct_pids AS
+SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid
+FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.pid) pids AS ppid
+where p.datainfo.deletedbyinference = false;
-CREATE TABLE ${stats_db_name}.otherresearchproduct_topics AS SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.subject) subjects AS subject where p.datainfo.deletedbyinference=false;
+CREATE TABLE ${stats_db_name}.otherresearchproduct_topics AS
+SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic
+FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.subject) subjects AS subject
+where p.datainfo.deletedbyinference = false;
+
+-- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_tmp COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_tmp COMPUTE STATISTICS FOR COLUMNS;
+-- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_classifications COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_classifications COMPUTE STATISTICS FOR COLUMNS;
+-- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_concepts COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_concepts COMPUTE STATISTICS FOR COLUMNS;
+-- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_datasources COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_datasources COMPUTE STATISTICS FOR COLUMNS;
+-- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_languages COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_languages COMPUTE STATISTICS FOR COLUMNS;
+-- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_oids COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_oids COMPUTE STATISTICS FOR COLUMNS;
+-- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_pids COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_pids COMPUTE STATISTICS FOR COLUMNS;
+-- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_topics COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_topics COMPUTE STATISTICS FOR COLUMNS;
\ No newline at end of file
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql
index b4745535d..23ef03bc9 100644
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql
@@ -1,30 +1,84 @@
--- noinspection SqlNoDataSourceInspectionForFile
-
------------------------------------------------------
------------------------------------------------------
-- Project table/view and Project related tables/views
------------------------------------------------------
------------------------------------------------------
--- Project_oids Table
-DROP TABLE IF EXISTS ${stats_db_name}.project_oids;
-CREATE TABLE ${stats_db_name}.project_oids AS SELECT substr(p.id, 4) AS id, oids.ids AS oid FROM ${openaire_db_name}.project p LATERAL VIEW explode(p.originalid) oids AS ids;
+CREATE TABLE ${stats_db_name}.project_oids AS
+SELECT substr(p.id, 4) AS id, oids.ids AS oid
+FROM ${openaire_db_name}.project p LATERAL VIEW explode(p.originalid) oids AS ids;
+CREATE TABLE ${stats_db_name}.project_organizations AS
+SELECT substr(r.source, 4) AS id, substr(r.target, 4) AS organization
+from ${openaire_db_name}.relation r
+WHERE r.reltype = 'projectOrganization'
+ and r.datainfo.deletedbyinference = false;
--- Project_organizations Table
-DROP TABLE IF EXISTS ${stats_db_name}.project_organizations;
-CREATE TABLE ${stats_db_name}.project_organizations AS SELECT substr(r.source, 4) AS id, substr(r.target, 4) AS organization from ${openaire_db_name}.relation r WHERE r.reltype='projectOrganization' and r.datainfo.deletedbyinference=false;
+CREATE TABLE ${stats_db_name}.project_results AS
+SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS result
+FROM ${openaire_db_name}.relation r
+WHERE r.reltype = 'resultProject'
+ and r.datainfo.deletedbyinference = false;
--- Project_results Table
-DROP TABLE IF EXISTS ${stats_db_name}.project_results;
-CREATE TABLE ${stats_db_name}.project_results AS SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS result FROM ${openaire_db_name}.relation r WHERE r.reltype='resultProject' and r.datainfo.deletedbyinference=false;
+CREATE TABLE ${stats_db_name}.project_tmp
+(
+ id STRING,
+ acronym STRING,
+ title STRING,
+ funder STRING,
+ funding_lvl0 STRING,
+ funding_lvl1 STRING,
+ funding_lvl2 STRING,
+ ec39 STRING,
+ type STRING,
+ startdate STRING,
+ enddate STRING,
+ start_year INT,
+ end_year INT,
+ duration INT,
+ haspubs STRING,
+ numpubs INT,
+ daysforlastpub INT,
+ delayedpubs INT,
+ callidentifier STRING,
+ code STRING
+) CLUSTERED BY (id) INTO 100 buckets stored AS orc tblproperties ('transactional' = 'true');
--- Project table
-----------------
--- Creating and populating temporary Project table
-DROP TABLE IF EXISTS ${stats_db_name}.project_tmp;
-CREATE TABLE ${stats_db_name}.project_tmp (id STRING, acronym STRING, title STRING, funder STRING, funding_lvl0 STRING, funding_lvl1 STRING, funding_lvl2 STRING, ec39 STRING, type STRING, startdate STRING, enddate STRING, start_year INT, end_year INT, duration INT, haspubs STRING, numpubs INT, daysforlastpub INT, delayedpubs INT, callidentifier STRING, code STRING) CLUSTERED BY (id) INTO 100 buckets stored AS orc tblproperties('transactional'='true');
-
-INSERT INTO ${stats_db_name}.project_tmp SELECT substr(p.id, 4) AS id, p.acronym.value AS acronym, p.title.value AS title, xpath_string(p.fundingtree[0].value, '//funder/name') AS funder, xpath_string(p.fundingtree[0].value, '//funding_level_0/name') AS funding_lvl0, xpath_string(p.fundingtree[0].value, '//funding_level_1/name') AS funding_lvl1, xpath_string(p.fundingtree[0].value, '//funding_level_2/name') AS funding_lvl2, p.ecsc39.value AS ec39, p.contracttype.classname AS type, p.startdate.value AS startdate, p.enddate.value AS enddate, year(p.startdate.value) AS start_year, year(p.enddate.value) AS end_year, CAST(MONTHS_BETWEEN(p.enddate.value, p.startdate.value) AS INT) AS duration, 'no' AS haspubs, 0 AS numpubs, 0 AS daysforlastpub, 0 AS delayedpubs, p.callidentifier.value AS callidentifier, p.code.value AS code FROM ${openaire_db_name}.project p WHERE p.datainfo.deletedbyinference=false;
+INSERT INTO ${stats_db_name}.project_tmp
+SELECT substr(p.id, 4) AS id,
+ p.acronym.value AS acronym,
+ p.title.value AS title,
+ xpath_string(p.fundingtree[0].value, '//funder/name') AS funder,
+ xpath_string(p.fundingtree[0].value, '//funding_level_0/name') AS funding_lvl0,
+ xpath_string(p.fundingtree[0].value, '//funding_level_1/name') AS funding_lvl1,
+ xpath_string(p.fundingtree[0].value, '//funding_level_2/name') AS funding_lvl2,
+ p.ecsc39.value AS ec39,
+ p.contracttype.classname AS type,
+ p.startdate.value AS startdate,
+ p.enddate.value AS enddate,
+ year(p.startdate.value) AS start_year,
+ year(p.enddate.value) AS end_year,
+ CAST(MONTHS_BETWEEN(p.enddate.value, p.startdate.value) AS INT) AS duration,
+ 'no' AS haspubs,
+ 0 AS numpubs,
+ 0 AS daysforlastpub,
+ 0 AS delayedpubs,
+ p.callidentifier.value AS callidentifier,
+ p.code.value AS code
+FROM ${openaire_db_name}.project p
+WHERE p.datainfo.deletedbyinference = false;
create table ${stats_db_name}.funder as
-select distinct xpath_string(fund, '//funder/id') as id, xpath_string(fund, '//funder/name') as name, xpath_string(fund, '//funder/shortname') as shortname
-from ${openaire_db_name}.project p lateral view explode(p.fundingtree.value) fundingtree as fund
+select distinct xpath_string(fund, '//funder/id') as id,
+ xpath_string(fund, '//funder/name') as name,
+ xpath_string(fund, '//funder/shortname') as shortname
+from ${openaire_db_name}.project p lateral view explode(p.fundingtree.value) fundingtree as fund;
+
+-- ANALYZE TABLE ${stats_db_name}.project_oids COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.project_oids COMPUTE STATISTICS FOR COLUMNS;
+-- ANALYZE TABLE ${stats_db_name}.project_organizations COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.project_organizations COMPUTE STATISTICS FOR COLUMNS;
+-- ANALYZE TABLE ${stats_db_name}.project_results COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.project_results COMPUTE STATISTICS FOR COLUMNS;
+-- ANALYZE TABLE ${stats_db_name}.project_tmp COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.project_tmp COMPUTE STATISTICS FOR COLUMNS;
+-- ANALYZE TABLE ${stats_db_name}.funder COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.funder COMPUTE STATISTICS FOR COLUMNS;
\ No newline at end of file
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql
index 36a4a8a49..ae540b9b2 100644
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql
@@ -5,27 +5,137 @@
----------------------------------------------------
-- Views on temporary tables that should be re-created in the end
-CREATE OR REPLACE VIEW ${stats_db_name}.result as SELECT *, bestlicence AS access_mode FROM ${stats_db_name}.publication_tmp UNION ALL SELECT *,bestlicence AS access_mode FROM ${stats_db_name}.software_tmp UNION ALL SELECT *,bestlicence AS access_mode FROM ${stats_db_name}.dataset_tmp UNION ALL SELECT *,bestlicence AS access_mode FROM ${stats_db_name}.otherresearchproduct_tmp;
+CREATE OR REPLACE VIEW ${stats_db_name}.result as
+SELECT *, bestlicence AS access_mode
+FROM ${stats_db_name}.publication_tmp
+UNION ALL
+SELECT *, bestlicence AS access_mode
+FROM ${stats_db_name}.software_tmp
+UNION ALL
+SELECT *, bestlicence AS access_mode
+FROM ${stats_db_name}.dataset_tmp
+UNION ALL
+SELECT *, bestlicence AS access_mode
+FROM ${stats_db_name}.otherresearchproduct_tmp;
-- Views on final tables
-CREATE OR REPLACE VIEW ${stats_db_name}.result_datasources AS SELECT * FROM ${stats_db_name}.publication_datasources UNION ALL SELECT * FROM ${stats_db_name}.software_datasources UNION ALL SELECT * FROM ${stats_db_name}.dataset_datasources UNION ALL SELECT * FROM ${stats_db_name}.otherresearchproduct_datasources;
+CREATE OR REPLACE VIEW ${stats_db_name}.result_datasources AS
+SELECT *
+FROM ${stats_db_name}.publication_datasources
+UNION ALL
+SELECT *
+FROM ${stats_db_name}.software_datasources
+UNION ALL
+SELECT *
+FROM ${stats_db_name}.dataset_datasources
+UNION ALL
+SELECT *
+FROM ${stats_db_name}.otherresearchproduct_datasources;
-CREATE OR REPLACE VIEW ${stats_db_name}.result_citations AS SELECT * FROM ${stats_db_name}.publication_citations UNION ALL SELECT * FROM ${stats_db_name}.software_citations UNION ALL SELECT * FROM ${stats_db_name}.dataset_citations UNION ALL SELECT * FROM ${stats_db_name}.otherresearchproduct_citations;
+CREATE OR REPLACE VIEW ${stats_db_name}.result_citations AS
+SELECT *
+FROM ${stats_db_name}.publication_citations
+UNION ALL
+SELECT *
+FROM ${stats_db_name}.software_citations
+UNION ALL
+SELECT *
+FROM ${stats_db_name}.dataset_citations
+UNION ALL
+SELECT *
+FROM ${stats_db_name}.otherresearchproduct_citations;
-CREATE OR REPLACE VIEW ${stats_db_name}.result_classifications AS SELECT * FROM ${stats_db_name}.publication_classifications UNION ALL SELECT * FROM ${stats_db_name}.software_classifications UNION ALL SELECT * FROM ${stats_db_name}.dataset_classifications UNION ALL SELECT * FROM ${stats_db_name}.otherresearchproduct_classifications;
+CREATE OR REPLACE VIEW ${stats_db_name}.result_classifications AS
+SELECT *
+FROM ${stats_db_name}.publication_classifications
+UNION ALL
+SELECT *
+FROM ${stats_db_name}.software_classifications
+UNION ALL
+SELECT *
+FROM ${stats_db_name}.dataset_classifications
+UNION ALL
+SELECT *
+FROM ${stats_db_name}.otherresearchproduct_classifications;
-CREATE OR REPLACE VIEW ${stats_db_name}.result_concepts AS SELECT * FROM ${stats_db_name}.publication_concepts UNION ALL SELECT * FROM ${stats_db_name}.software_concepts UNION ALL SELECT * FROM ${stats_db_name}.dataset_concepts UNION ALL SELECT * FROM ${stats_db_name}.otherresearchproduct_concepts;
+CREATE OR REPLACE VIEW ${stats_db_name}.result_concepts AS
+SELECT *
+FROM ${stats_db_name}.publication_concepts
+UNION ALL
+SELECT *
+FROM ${stats_db_name}.software_concepts
+UNION ALL
+SELECT *
+FROM ${stats_db_name}.dataset_concepts
+UNION ALL
+SELECT *
+FROM ${stats_db_name}.otherresearchproduct_concepts;
-CREATE OR REPLACE VIEW ${stats_db_name}.result_languages AS SELECT * FROM ${stats_db_name}.publication_languages UNION ALL SELECT * FROM ${stats_db_name}.software_languages UNION ALL SELECT * FROM ${stats_db_name}.dataset_languages UNION ALL SELECT * FROM ${stats_db_name}.otherresearchproduct_languages;
+CREATE OR REPLACE VIEW ${stats_db_name}.result_languages AS
+SELECT *
+FROM ${stats_db_name}.publication_languages
+UNION ALL
+SELECT *
+FROM ${stats_db_name}.software_languages
+UNION ALL
+SELECT *
+FROM ${stats_db_name}.dataset_languages
+UNION ALL
+SELECT *
+FROM ${stats_db_name}.otherresearchproduct_languages;
-CREATE OR REPLACE VIEW ${stats_db_name}.result_oids AS SELECT * FROM ${stats_db_name}.publication_oids UNION ALL SELECT * FROM ${stats_db_name}.software_oids UNION ALL SELECT * FROM ${stats_db_name}.dataset_oids UNION ALL SELECT * FROM ${stats_db_name}.otherresearchproduct_oids;
+CREATE OR REPLACE VIEW ${stats_db_name}.result_oids AS
+SELECT *
+FROM ${stats_db_name}.publication_oids
+UNION ALL
+SELECT *
+FROM ${stats_db_name}.software_oids
+UNION ALL
+SELECT *
+FROM ${stats_db_name}.dataset_oids
+UNION ALL
+SELECT *
+FROM ${stats_db_name}.otherresearchproduct_oids;
-CREATE OR REPLACE VIEW ${stats_db_name}.result_pids AS SELECT * FROM ${stats_db_name}.publication_pids UNION ALL SELECT * FROM ${stats_db_name}.software_pids UNION ALL SELECT * FROM ${stats_db_name}.dataset_pids UNION ALL SELECT * FROM ${stats_db_name}.otherresearchproduct_pids;
+CREATE OR REPLACE VIEW ${stats_db_name}.result_pids AS
+SELECT *
+FROM ${stats_db_name}.publication_pids
+UNION ALL
+SELECT *
+FROM ${stats_db_name}.software_pids
+UNION ALL
+SELECT *
+FROM ${stats_db_name}.dataset_pids
+UNION ALL
+SELECT *
+FROM ${stats_db_name}.otherresearchproduct_pids;
-CREATE OR REPLACE VIEW ${stats_db_name}.result_topics AS SELECT * FROM ${stats_db_name}.publication_topics UNION ALL SELECT * FROM ${stats_db_name}.software_topics UNION ALL SELECT * FROM ${stats_db_name}.dataset_topics UNION ALL SELECT * FROM ${stats_db_name}.otherresearchproduct_topics;
+CREATE OR REPLACE VIEW ${stats_db_name}.result_topics AS
+SELECT *
+FROM ${stats_db_name}.publication_topics
+UNION ALL
+SELECT *
+FROM ${stats_db_name}.software_topics
+UNION ALL
+SELECT *
+FROM ${stats_db_name}.dataset_topics
+UNION ALL
+SELECT *
+FROM ${stats_db_name}.otherresearchproduct_topics;
-DROP TABLE IF EXISTS ${stats_db_name}.result_organization;
-CREATE TABLE ${stats_db_name}.result_organization AS SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization FROM ${openaire_db_name}.relation r WHERE r.reltype='resultOrganization' and r.datainfo.deletedbyinference=false;
+CREATE TABLE ${stats_db_name}.result_organization AS
+SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization
+FROM ${openaire_db_name}.relation r
+WHERE r.reltype = 'resultOrganization'
+ and r.datainfo.deletedbyinference = false;
-DROP TABLE IF EXISTS ${stats_db_name}.result_projects;
-CREATE TABLE ${stats_db_name}.result_projects AS select pr.result AS id, pr.id AS project, datediff(p.enddate, p.startdate) AS daysfromend FROM ${stats_db_name}.result r JOIN ${stats_db_name}.project_results pr ON r.id=pr.result JOIN ${stats_db_name}.project_tmp p ON p.id=pr.id;
+CREATE TABLE ${stats_db_name}.result_projects AS
+select pr.result AS id, pr.id AS project, datediff(p.enddate, p.startdate) AS daysfromend
+FROM ${stats_db_name}.result r
+ JOIN ${stats_db_name}.project_results pr ON r.id = pr.result
+ JOIN ${stats_db_name}.project_tmp p ON p.id = pr.id;
+
+-- ANALYZE TABLE ${stats_db_name}.result_organization COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.result_organization COMPUTE STATISTICS FOR COLUMNS;
+-- ANALYZE TABLE ${stats_db_name}.result_projects COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.result_projects COMPUTE STATISTICS FOR COLUMNS;
\ No newline at end of file
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql
index 197047c8b..de0fedd7e 100644
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql
@@ -5,54 +5,105 @@
-- Datasource table/view and Datasource related tables/views
------------------------------------------------------------
------------------------------------------------------------
-
--- Datasource table creation & update
--------------------------------------
--- Creating and populating temporary datasource table
-DROP TABLE IF EXISTS ${stats_db_name}.datasource_tmp;
-CREATE TABLE ${stats_db_name}.datasource_tmp(`id` string, `name` STRING, `type` STRING, `dateofvalidation` STRING, `yearofvalidation` string, `harvested` BOOLEAN, `piwik_id` INT, `latitude` STRING, `longitude`STRING, `websiteurl` STRING, `compatibility` STRING) CLUSTERED BY (id) INTO 100 buckets stored AS orc tblproperties('transactional'='true');
+CREATE TABLE ${stats_db_name}.datasource_tmp
+(
+ `id` string,
+ `name` STRING,
+ `type` STRING,
+ `dateofvalidation` STRING,
+ `yearofvalidation` string,
+ `harvested` BOOLEAN,
+ `piwik_id` INT,
+ `latitude` STRING,
+ `longitude` STRING,
+ `websiteurl` STRING,
+ `compatibility` STRING
+) CLUSTERED BY (id) INTO 100 buckets stored AS orc tblproperties ('transactional' = 'true');
-- Insert statement that takes into account the piwik_id of the openAIRE graph
-INSERT INTO ${stats_db_name}.datasource_tmp
-SELECT substr(d1.id, 4) AS id, officialname.value AS name,
-datasourcetype.classname AS type, dateofvalidation.value AS dateofvalidation, date_format(d1.dateofvalidation.value,'yyyy') AS yearofvalidation,
-FALSE AS harvested,
-CASE WHEN d2.piwik_id IS NULL THEN 0 ELSE d2.piwik_id END AS piwik_id,
-d1.latitude.value AS latitude, d1.longitude.value AS longitude,
-d1.websiteurl.value AS websiteurl, d1.openairecompatibility.classid AS compatibility
+INSERT INTO ${stats_db_name}.datasource_tmp
+SELECT substr(d1.id, 4) AS id,
+ officialname.value AS name,
+ datasourcetype.classname AS type,
+ dateofvalidation.value AS dateofvalidation,
+ date_format(d1.dateofvalidation.value, 'yyyy') AS yearofvalidation,
+ FALSE AS harvested,
+ CASE WHEN d2.piwik_id IS NULL THEN 0 ELSE d2.piwik_id END AS piwik_id,
+ d1.latitude.value AS latitude,
+ d1.longitude.value AS longitude,
+ d1.websiteurl.value AS websiteurl,
+ d1.openairecompatibility.classid AS compatibility
FROM ${openaire_db_name}.datasource d1
-LEFT OUTER JOIN
-(SELECT id, split(originalidd, '\\:')[1] as piwik_id
-FROM ${openaire_db_name}.datasource
-LATERAL VIEW EXPLODE(originalid) temp AS originalidd
-WHERE originalidd like "piwik:%") AS d2
-ON d1.id = d2.id
-WHERE d1.datainfo.deletedbyinference=FALSE;
+ LEFT OUTER JOIN
+ (SELECT id, split(originalidd, '\\:')[1] as piwik_id
+ FROM ${openaire_db_name}.datasource
+ LATERAL VIEW EXPLODE(originalid) temp AS originalidd
+ WHERE originalidd like "piwik:%") AS d2
+ ON d1.id = d2.id
+WHERE d1.datainfo.deletedbyinference = FALSE;
-- Updating temporary table with everything that is not based on results -> This is done with the following "dual" table.
-- Creating a temporary dual table that will be removed after the following insert
-CREATE TABLE ${stats_db_name}.dual(dummy CHAR(1));
-INSERT INTO ${stats_db_name}.dual VALUES('X');
-INSERT INTO ${stats_db_name}.datasource_tmp (`id`, `name`, `type`, `dateofvalidation`, `yearofvalidation`, `harvested`, `piwik_id`, `latitude`, `longitude`, `websiteurl`, `compatibility`)
-SELECT 'other', 'Other', 'Repository', NULL, NULL, false, 0, NULL, NULL, NULL, 'unknown' FROM ${stats_db_name}.dual WHERE 'other' not in (SELECT id FROM ${stats_db_name}.datasource_tmp WHERE name='Unknown Repository');
+CREATE TABLE ${stats_db_name}.dual
+(
+ dummy CHAR(1)
+);
+INSERT INTO ${stats_db_name}.dual
+VALUES ('X');
+INSERT INTO ${stats_db_name}.datasource_tmp (`id`, `name`, `type`, `dateofvalidation`, `yearofvalidation`, `harvested`,
+ `piwik_id`, `latitude`, `longitude`, `websiteurl`, `compatibility`)
+SELECT 'other',
+ 'Other',
+ 'Repository',
+ NULL,
+ NULL,
+ false,
+ 0,
+ NULL,
+ NULL,
+ NULL,
+ 'unknown'
+FROM ${stats_db_name}.dual
+WHERE 'other' not in (SELECT id FROM ${stats_db_name}.datasource_tmp WHERE name = 'Unknown Repository');
DROP TABLE ${stats_db_name}.dual;
-UPDATE ${stats_db_name}.datasource_tmp SET name='Other' WHERE name='Unknown Repository';
-UPDATE ${stats_db_name}.datasource_tmp SET yearofvalidation=null WHERE yearofvalidation='-1';
+UPDATE ${stats_db_name}.datasource_tmp
+SET name='Other'
+WHERE name = 'Unknown Repository';
+UPDATE ${stats_db_name}.datasource_tmp
+SET yearofvalidation=null
+WHERE yearofvalidation = '-1';
-DROP TABLE IF EXISTS ${stats_db_name}.datasource_languages;
-CREATE TABLE ${stats_db_name}.datasource_languages AS SELECT substr(d.id, 4) AS id, langs.languages AS language FROM ${openaire_db_name}.datasource d LATERAL VIEW explode(d.odlanguages.value) langs AS languages;
+CREATE TABLE ${stats_db_name}.datasource_languages AS
+SELECT substr(d.id, 4) AS id, langs.languages AS language
+FROM ${openaire_db_name}.datasource d LATERAL VIEW explode(d.odlanguages.value) langs AS languages;
-DROP TABLE IF EXISTS ${stats_db_name}.datasource_oids;
-CREATE TABLE ${stats_db_name}.datasource_oids AS SELECT substr(d.id, 4) AS id, oids.ids AS oid FROM ${openaire_db_name}.datasource d LATERAL VIEW explode(d.originalid) oids AS ids;
+CREATE TABLE ${stats_db_name}.datasource_oids AS
+SELECT substr(d.id, 4) AS id, oids.ids AS oid
+FROM ${openaire_db_name}.datasource d LATERAL VIEW explode(d.originalid) oids AS ids;
-DROP TABLE IF EXISTS ${stats_db_name}.datasource_organizations;
-CREATE TABLE ${stats_db_name}.datasource_organizations AS SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization FROM ${openaire_db_name}.relation r WHERE r.reltype='datasourceOrganization' and r.datainfo.deletedbyinference=false;
+CREATE TABLE ${stats_db_name}.datasource_organizations AS
+SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization
+FROM ${openaire_db_name}.relation r
+WHERE r.reltype = 'datasourceOrganization'
+ and r.datainfo.deletedbyinference = false;
-- datasource sources:
-- where the datasource info have been collected from.
-create table if not exists ${stats_db_name}.datasource_sources AS select substr(d.id,4) as id, substr(cf.key, 4) as datasource from ${openaire_db_name}.datasource d lateral view explode(d.collectedfrom) cfrom as cf where d.datainfo.deletedbyinference=false;
-
-CREATE OR REPLACE VIEW ${stats_db_name}.datasource_results AS SELECT datasource AS id, id AS result FROM ${stats_db_name}.result_datasources;
+create table if not exists ${stats_db_name}.datasource_sources AS
+select substr(d.id, 4) as id, substr(cf.key, 4) as datasource
+from ${openaire_db_name}.datasource d lateral view explode(d.collectedfrom) cfrom as cf
+where d.datainfo.deletedbyinference = false;
+CREATE OR REPLACE VIEW ${stats_db_name}.datasource_results AS
+SELECT datasource AS id, id AS result
+FROM ${stats_db_name}.result_datasources;
+-- ANALYZE TABLE ${stats_db_name}.datasource_tmp COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.datasource_tmp COMPUTE STATISTICS FOR COLUMNS;
+-- ANALYZE TABLE ${stats_db_name}.datasource_languages COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.datasource_languages COMPUTE STATISTICS FOR COLUMNS;
+-- ANALYZE TABLE ${stats_db_name}.datasource_oids COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.datasource_oids COMPUTE STATISTICS FOR COLUMNS;
+-- ANALYZE TABLE ${stats_db_name}.datasource_organizations COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.datasource_organizations COMPUTE STATISTICS FOR COLUMNS;
\ No newline at end of file
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step9.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step9.sql
index a918e4de4..a1cb46185 100644
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step9.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step9.sql
@@ -3,10 +3,21 @@
-- Organization table/view and Organization related tables/views
----------------------------------------------------------------
----------------------------------------------------------------
-DROP TABLE IF EXISTS ${stats_db_name}.organization;
-CREATE TABLE IF NOT EXISTS ${stats_db_name}.organization AS SELECT substr(o.id, 4) as id, o.legalname.value as name, o.legalshortname.value as legalshortname, o.country.classid as country
-FROM ${openaire_db_name}.organization o WHERE o.datainfo.deletedbyinference=FALSE;
+CREATE TABLE IF NOT EXISTS ${stats_db_name}.organization AS
+SELECT substr(o.id, 4) as id,
+ o.legalname.value as name,
+ o.legalshortname.value as legalshortname,
+ o.country.classid as country
+FROM ${openaire_db_name}.organization o
+WHERE o.datainfo.deletedbyinference = FALSE;
-CREATE OR REPLACE VIEW ${stats_db_name}.organization_datasources AS SELECT organization AS id, id AS datasource FROM ${stats_db_name}.datasource_organizations;
+CREATE OR REPLACE VIEW ${stats_db_name}.organization_datasources AS
+SELECT organization AS id, id AS datasource
+FROM ${stats_db_name}.datasource_organizations;
-CREATE OR REPLACE VIEW ${stats_db_name}.organization_projects AS SELECT id AS project, organization as id FROM ${stats_db_name}.project_organizations;
+CREATE OR REPLACE VIEW ${stats_db_name}.organization_projects AS
+SELECT id AS project, organization as id
+FROM ${stats_db_name}.project_organizations;
+
+-- ANALYZE TABLE ${stats_db_name}.organization COMPUTE STATISTICS;
+-- ANALYZE TABLE ${stats_db_name}.organization COMPUTE STATISTICS FOR COLUMNS;
\ No newline at end of file
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/updateCache.sh b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/updateCache.sh
index 36e74a556..dc19f84b4 100644
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/updateCache.sh
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/updateCache.sh
@@ -1,4 +1,4 @@
#!/usr/bin/env bash
curl --request GET $1/cache/updateCache
-
+sleep 20h
\ No newline at end of file
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml
index ceaa47794..9c16f149d 100644
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml
@@ -17,6 +17,14 @@
stats_db_shadow_name
the name of the shadow schema
+
+ monitor_db_name
+ the target monitor db name
+
+
+ monitor_db_shadow_name
+ the name of the shadow monitor db
+
stats_tool_api_url
The url of the API of the stats tool. Is used to trigger the cache update.
@@ -252,31 +260,6 @@
stats_db_name=${stats_db_name}
openaire_db_name=${openaire_db_name}
-
-
-
-
-
-
- ${hive_jdbc_url}
-
- stats_db_name=${stats_db_name}
- stats_db_shadow_name=${stats_db_shadow_name}
-
-
-
-
-
-
-
- ${jobTracker}
- ${nameNode}
- impala-shell.sh
- ${stats_db_name}
- step18.sql
- ${wf:appPath()}/scripts/step18.sql
- impala-shell.sh
-
@@ -285,17 +268,31 @@
${jobTracker}
${nameNode}
- impala-shell.sh
+ finalizedb.sh
+ ${stats_db_name}
${stats_db_shadow_name}
- step19.sql
- ${wf:appPath()}/scripts/step19.sql
- impala-shell.sh
+ finalizedb.sh
-
+
-
+
+
+ ${jobTracker}
+ ${nameNode}
+ monitor.sh
+ ${stats_db_name}
+ ${monitor_db_name}
+ ${monitor_db_shadow_name}
+ ${wf:appPath()}/scripts/step20-createMonitorDB.sql
+ monitor.sh
+
+
+
+
+
+
${jobTracker}
${nameNode}