From 2754c3dd62e2c7bf60ebd00cdc7a0ec9ab9a80c1 Mon Sep 17 00:00:00 2001 From: Antonis Lempesis Date: Mon, 13 Dec 2021 16:26:14 +0200 Subject: [PATCH] moving data to impala cluster and creating shadow databases there --- .../oa/graph/stats/oozie_app/finalizedb.sh | 8 +---- .../dhp/oa/graph/stats/oozie_app/monitor.sh | 10 +----- .../graph/stats/oozie_app/observatory-post.sh | 10 +----- .../dhp/oa/graph/stats/oozie_app/workflow.xml | 36 +++++++++++++++++-- 4 files changed, 37 insertions(+), 27 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/finalizedb.sh b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/finalizedb.sh index 60771dfa7..9de472955 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/finalizedb.sh +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/finalizedb.sh @@ -11,10 +11,4 @@ export SHADOW=$2 echo "Updating shadow database" hive --database ${SOURCE} -e "show tables" | grep -v WARN | sed "s/^\(.*\)/analyze table ${SOURCE}.\1 compute statistics;/" > foo -hive -f foo -hive -e "create database if not exists ${SHADOW}" -hive --database ${SHADOW} -e "show tables" | grep -v WARN | sed "s/^/drop view if exists ${SHADOW}./" | sed "s/$/;/" > foo -hive -f foo -hive --database ${SOURCE} -e "show tables" | grep -v WARN | sed "s/\(.*\)/create view ${SHADOW}.\1 as select * from ${SOURCE}.\1;/" > foo -hive -f foo -echo "Shadow db ready!" \ No newline at end of file +hive -f foo \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/monitor.sh b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/monitor.sh index 37809652d..a4e7eec57 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/monitor.sh +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/monitor.sh @@ -17,12 +17,4 @@ hdfs dfs -copyToLocal $4 echo "Creating monitor database" cat step20-createMonitorDB.sql | sed s/SOURCE/$1/g | sed s/TARGET/$2/g1 > foo hive -f foo -echo "Impala shell finished" - -echo "Updating shadow monitor database" -hive -e "create database if not exists ${SHADOW}" -hive --database ${SHADOW} -e "show tables" | grep -v WARN | sed "s/^/drop view if exists ${SHADOW}./" | sed "s/$/;/" > foo -hive -f foo -hive --database ${TARGET} -e "show tables" | grep -v WARN | sed "s/\(.*\)/create view ${SHADOW}.\1 as select * from ${TARGET}.\1;/" > foo -hive -f foo -echo "Shadow db ready!" \ No newline at end of file +echo "Impala shell finished" \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/observatory-post.sh b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/observatory-post.sh index d074e6a55..12315c9e8 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/observatory-post.sh +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/observatory-post.sh @@ -12,12 +12,4 @@ export SHADOW=$3 hive --database ${TARGET} -e "show tables" | grep -v WARN | sed "s/\(.*\)/analyze table ${TARGET}.\1 compute statistics;/" > foo hive -f foo -echo "Impala shell finished" - -echo "Updating shadow observatory database" -hive -e "create database if not exists ${SHADOW}" -hive --database ${SHADOW} -e "show tables" | grep -v WARN | sed "s/^/drop view if exists ${SHADOW}./" | sed "s/$/;/" > foo -hive -f foo -hive -d ${TARGET} -e "show tables" | grep -v WARN | sed "s/\(.*\)/create view ${SHADOW}.\1 as select * from ${TARGET}.\1;/" > foo -hive -f foo -echo "Shadow db ready!" \ No newline at end of file +echo "Impala shell finished" \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml index 08d33f4e8..7ac3cefbb 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml @@ -365,11 +365,43 @@ ${observatory_db_shadow_name} observatory-post.sh - + - + + + ${jobTracker} + ${nameNode} + copyDataToImpalaCluster.sh + ${external_stats_db_name} + ${stats_db_name} + ${monitor_db_name} + ${observatory_db_name} + copyDataToImpalaCluster.sh + + + + + + + + ${jobTracker} + ${nameNode} + finalizeImpalaCluster.sh + ${stats_db_name} + ${stats_db_shadow_name} + ${monitor_db_name} + ${monitor_db_shadow_name} + ${observatory_db_name} + ${observatory_db_shadow_name} + finalizeImpalaCluster.sh + + + + + + ${jobTracker} ${nameNode}