diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/copyDataToImpalaCluster.sh b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/copyDataToImpalaCluster.sh
index 5b6752398..843877c90 100644
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/copyDataToImpalaCluster.sh
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/copyDataToImpalaCluster.sh
@@ -14,7 +14,7 @@ function copydb() {
# copy the databases from ocean to impala
- #echo "copying $db"
+ echo "copying $db"
hadoop distcp -Dmapreduce.map.memory.mb=6144 -pb hdfs://nameservice1/user/hive/warehouse/${db}.db hdfs://impala-cluster-mn1.openaire.eu:8020/tmp
# change ownership to impala
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/indicators.sh b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/indicators.sh
index 473864315..2f1eefa0c 100644
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/indicators.sh
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/indicators.sh
@@ -7,7 +7,9 @@ then
fi
export TARGET=$1
-export SCRIPT_PATH=$2
+export STATS_EXT=$2
+export SCRIPT_PATH=$3
+
export HIVE_OPTS="-hiveconf mapred.job.queue.name=analytics -hiveconf hive.spark.client.connect.timeout=120000ms -hiveconf hive.spark.client.server.connect.timeout=300000ms -hiveconf spark.executor.memory=19166291558 -hiveconf spark.yarn.executor.memoryOverhead=3225 -hiveconf spark.driver.memory=11596411699 -hiveconf spark.yarn.driver.memoryOverhead=1228 -hiveconf hive.auto.convert.join=false"
export HADOOP_USER_NAME="oozie"
@@ -15,7 +17,7 @@ echo "Getting file from " $SCRIPT_PATH
hdfs dfs -copyToLocal $SCRIPT_PATH
echo "Creating indicators"
-hive $HIVE_OPTS --database ${TARGET} -e "show tables" | grep -v WARN | sed "s/^\(.*\)/analyze table ${TARGET}.\1 compute statistics;/" > foo
+hive $HIVE_OPTS --database ${TARGET} -e "show tables" | grep -v WARN | sed "s/STATS_EXT/${STATS_EXT}/g" |sed "s/^\(.*\)/analyze table ${TARGET}.\1 compute statistics;/" > foo
hive $HIVE_OPTS -f foo
hive $HIVE_OPTS --database ${TARGET} -f step16-createIndicatorsTables.sql
echo "Indicators created"
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15_5.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15_5.sql
index 1ae856355..61c0726ff 100644
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15_5.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15_5.sql
@@ -29,17 +29,17 @@ select rcount.pid, sum(case when rcount.type='publication' then rcount.count els
from rcount
group by rcount.pid;
-create view ${stats_db_name}.rndexpenditure as select * from stats_ext.rndexpediture;
-create view ${stats_db_name}.rndgdpexpenditure as select * from stats_ext.rndgdpexpenditure;
-create view ${stats_db_name}.doctoratestudents as select * from stats_ext.doctoratestudents;
-create view ${stats_db_name}.totalresearchers as select * from stats_ext.totalresearchers;
-create view ${stats_db_name}.totalresearchersft as select * from stats_ext.totalresearchersft;
-create view ${stats_db_name}.hrrst as select * from stats_ext.hrrst;
+create view ${stats_db_name}.rndexpenditure as select * from ${external_stats_db_name}.rndexpediture;
+create view ${stats_db_name}.rndgdpexpenditure as select * from ${external_stats_db_name}.rndgdpexpenditure;
+create view ${stats_db_name}.doctoratestudents as select * from ${external_stats_db_name}.doctoratestudents;
+create view ${stats_db_name}.totalresearchers as select * from ${external_stats_db_name}.totalresearchers;
+create view ${stats_db_name}.totalresearchersft as select * from ${external_stats_db_name}.totalresearchersft;
+create view ${stats_db_name}.hrrst as select * from ${external_stats_db_name}.hrrst;
create table ${stats_db_name}.result_instance stored as parquet as
select distinct r.*
from (
- select substr(r.id, 4) as id, inst.accessright.classname as accessright, substr(inst.collectedfrom.key, 4) as collectedfrom,
+ select substr(r.id, 4) as id, inst.accessright.classname as accessright, inst.accessright.openaccessroute as accessright_uw, substr(inst.collectedfrom.key, 4) as collectedfrom,
substr(inst.hostedby.key, 4) as hostedby, inst.dateofacceptance.value as dateofacceptance, inst.license.value as license, p.qualifier.classname as pidtype, p.value as pid
from ${openaire_db_name}.result r lateral view explode(r.instance) instances as inst lateral view explode(inst.pid) pids as p) r
join ${stats_db_name}.result res on res.id=r.id;
@@ -52,4 +52,4 @@ from (
join ${stats_db_name}.result res on res.id=r.id
where r.amount is not null;
-create view ${stats_db_name}.issn_gold_oa_dataset as select * from stats_ext.issn_gold_oa_dataset;
\ No newline at end of file
+create view ${stats_db_name}.issn_gold_oa_dataset as select * from ${external_stats_db_name}.issn_gold_oa_dataset;
\ No newline at end of file
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql
index ac4d4202a..4fd941e5d 100755
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql
@@ -180,7 +180,7 @@ from publication_datasources pd
left outer join (
select pd.id, 1 as in_diamond_journal from publication_datasources pd
join datasource d on d.id=pd.datasource
- join stats_ext.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_online=d.issn_online)
+ join STATS_EXT.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_online=d.issn_online)
and (ps.journal_is_in_doaj=true or ps.journal_is_oa=true) and ps.has_apc=false) tmp
on pd.id=tmp.id;
@@ -192,7 +192,7 @@ from publication pd
left outer join (
select pd.id, 1 as is_transformative from publication_datasources pd
join datasource d on d.id=pd.datasource
- join stats_ext.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_online=d.issn_online)
+ join STATS_EXT.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_online=d.issn_online)
and ps.is_transformative_journal=true) tmp
on pd.id=tmp.id;
@@ -220,11 +220,11 @@ ANALYZE TABLE indi_result_no_of_copies COMPUTE STATISTICS;
create table if not exists indi_pub_hybrid_oa_with_cc stored as parquet as
WITH hybrid_oa AS (
SELECT issn_l, journal_is_in_doaj, journal_is_oa, issn_print as issn
- FROM stats_ext.plan_s_jn
+ FROM STATS_EXT.plan_s_jn
WHERE issn_print != ""
UNION ALL
SELECT issn_l, journal_is_in_doaj, journal_is_oa, issn_online as issn
- FROM stats_ext.plan_s_jn
+ FROM STATS_EXT.plan_s_jn
WHERE issn_online != "" and (journal_is_in_doaj = FALSE OR journal_is_oa = FALSE)),
issn AS (
SELECT *
@@ -291,7 +291,7 @@ create table if not exists indi_pub_gold_oa stored as parquet as
journal_is_oa,
issn_1 as issn
FROM
- stats_ext.oa_journals
+ STATS_EXT.oa_journals
WHERE
issn_1 != ""
UNION
@@ -301,7 +301,7 @@ create table if not exists indi_pub_gold_oa stored as parquet as
journal_is_oa,
issn_2 as issn
FROM
- stats_ext.oa_journals
+ STATS_EXT.oa_journals
WHERE
issn_2 != "" ), issn AS ( SELECT
*
@@ -343,7 +343,7 @@ create table if not exists indi_pub_hybrid stored as parquet as
issn_1 as issn,
has_apc
FROM
- stats_ext.oa_journals
+ STATS_EXT.oa_journals
WHERE
issn_1 != ""
UNION
@@ -354,7 +354,7 @@ create table if not exists indi_pub_hybrid stored as parquet as
issn_2 as issn,
has_apc
FROM
- stats_ext.oa_journals
+ STATS_EXT.oa_journals
WHERE
issn_2 != "" ), issn AS ( SELECT
*
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml
index e23bd0aa3..e9453d7b1 100644
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml
@@ -108,6 +108,7 @@
${wf:conf('resumeFrom') eq 'step21-createObservatoryDB-post'}
${wf:conf('resumeFrom') eq 'step22-copyDataToImpalaCluster'}
${wf:conf('resumeFrom') eq 'step23-finalizeImpalaCluster'}
+ ${wf:conf('resumeFrom') eq 'Step24-updateCache'}
@@ -289,6 +290,7 @@
stats_db_name=${stats_db_name}
openaire_db_name=${openaire_db_name}
+ external_stats_db_name=${external_stats_db_name}
@@ -313,6 +315,7 @@
${nameNode}
indicators.sh
${stats_db_name}
+ ${external_stats_db_name}
${wf:appPath()}/scripts/step16-createIndicatorsTables.sql
indicators.sh
@@ -452,21 +455,21 @@
${observatory_db_shadow_name}
finalizeImpalaCluster.sh
+
+
+
+
+
+
+ ${jobTracker}
+ ${nameNode}
+ updateCache.sh
+ ${stats_tool_api_url}
+ updateCache.sh
+
-
-
-
-
-
-
-
-
-
-
-
-