Updates to steps related to transfer data to impala cluster
1. Remove external table definitions in stats_ext 2. Fix the issue where some views are not created. 3. Added two workflow parameters for copying also the usage stats dbs
This commit is contained in:
parent
b3f9633205
commit
86f4f63daf
|
@ -8,7 +8,7 @@ fi
|
||||||
|
|
||||||
#export HADOOP_USER_NAME="dimitris.pierrakos"
|
#export HADOOP_USER_NAME="dimitris.pierrakos"
|
||||||
export HADOOP_USER_NAME=$5
|
export HADOOP_USER_NAME=$5
|
||||||
|
export PROD_USAGE_STATS_DB="openaire_prod_usage_stats"
|
||||||
function copydb() {
|
function copydb() {
|
||||||
db=$1
|
db=$1
|
||||||
FILE=("hive_wf_tmp_"$RANDOM)
|
FILE=("hive_wf_tmp_"$RANDOM)
|
||||||
|
@ -27,16 +27,23 @@ function copydb() {
|
||||||
|
|
||||||
impala-shell --user $HADOOP_USER_NAME -q "INVALIDATE METADATA"
|
impala-shell --user $HADOOP_USER_NAME -q "INVALIDATE METADATA"
|
||||||
echo "creating schema for ${db}"
|
echo "creating schema for ${db}"
|
||||||
|
for (( k = 0; k < 5; k ++ )); do
|
||||||
for i in `impala-shell --user $HADOOP_USER_NAME -d ${db} --delimited -q "show tables"`;
|
for i in `impala-shell --user $HADOOP_USER_NAME -d ${db} --delimited -q "show tables"`;
|
||||||
do
|
do
|
||||||
impala-shell --user $HADOOP_USER_NAME -d ${db} --delimited -q "show create table $i";
|
impala-shell --user $HADOOP_USER_NAME -d ${db} --delimited -q "show create table $i";
|
||||||
done | sed 's/"$/;/' | sed 's/^"//' | sed 's/[[:space:]]\date[[:space:]]/`date`/g' | impala-shell --user $HADOOP_USER_NAME -i impala-cluster-dn1.openaire.eu -c -f -
|
done | sed 's/"$/;/' | sed 's/^"//' | sed 's/[[:space:]]\date[[:space:]]/`date`/g' | impala-shell --user $HADOOP_USER_NAME -i impala-cluster-dn1.openaire.eu -c -f -
|
||||||
|
done
|
||||||
|
|
||||||
# run the same command twice because we may have failures in the first run (due to views pointing to the same db)
|
# for i in `impala-shell --user $HADOOP_USER_NAME -d ${db} --delimited -q "show tables"`;
|
||||||
for i in `impala-shell --user $HADOOP_USER_NAME -d ${db} --delimited -q "show tables"`;
|
# do
|
||||||
do
|
# impala-shell --user $HADOOP_USER_NAME -d ${db} --delimited -q "show create table $i";
|
||||||
impala-shell --user $HADOOP_USER_NAME -d ${db} --delimited -q "show create table $i";
|
# done | sed 's/"$/;/' | sed 's/^"//' | sed 's/[[:space:]]\date[[:space:]]/`date`/g' | impala-shell --user $HADOOP_USER_NAME -i impala-cluster-dn1.openaire.eu -c -f -
|
||||||
done | sed 's/"$/;/' | sed 's/^"//' | sed 's/[[:space:]]\date[[:space:]]/`date`/g' | impala-shell --user $HADOOP_USER_NAME -i impala-cluster-dn1.openaire.eu -c -f -
|
#
|
||||||
|
# # run the same command twice because we may have failures in the first run (due to views pointing to the same db)
|
||||||
|
# for i in `impala-shell --user $HADOOP_USER_NAME -d ${db} --delimited -q "show tables"`;
|
||||||
|
# do
|
||||||
|
# impala-shell --user $HADOOP_USER_NAME -d ${db} --delimited -q "show create table $i";
|
||||||
|
# done | sed 's/"$/;/' | sed 's/^"//' | sed 's/[[:space:]]\date[[:space:]]/`date`/g' | impala-shell --user $HADOOP_USER_NAME -i impala-cluster-dn1.openaire.eu -c -f -
|
||||||
|
|
||||||
# load the data from /tmp in the respective tables
|
# load the data from /tmp in the respective tables
|
||||||
echo "copying data in tables and computing stats"
|
echo "copying data in tables and computing stats"
|
||||||
|
@ -54,8 +61,11 @@ STATS_DB=$1
|
||||||
MONITOR_DB=$2
|
MONITOR_DB=$2
|
||||||
OBSERVATORY_DB=$3
|
OBSERVATORY_DB=$3
|
||||||
EXT_DB=$4
|
EXT_DB=$4
|
||||||
HADOOP_USER_NAME=$5
|
USAGE_STATS_DB=$5
|
||||||
|
HADOOP_USER_NAME=$6
|
||||||
|
|
||||||
|
copydb $USAGE_STATS_DB
|
||||||
|
copydb $PROD_USAGE_STATS_DB
|
||||||
copydb $EXT_DB
|
copydb $EXT_DB
|
||||||
copydb $STATS_DB
|
copydb $STATS_DB
|
||||||
copydb $MONITOR_DB
|
copydb $MONITOR_DB
|
||||||
|
|
|
@ -22,7 +22,10 @@ MONITOR_DB=$3
|
||||||
MONITOR_DB_SHADOW=$4
|
MONITOR_DB_SHADOW=$4
|
||||||
OBSERVATORY_DB=$5
|
OBSERVATORY_DB=$5
|
||||||
OBSERVATORY_DB_SHADOW=$6
|
OBSERVATORY_DB_SHADOW=$6
|
||||||
|
USAGE_STATS_DB=$7
|
||||||
|
USAGE_STATS_DB_SHADOW=$8
|
||||||
|
|
||||||
createShadowDB $STATS_DB $STATS_DB_SHADOW
|
createShadowDB $STATS_DB $STATS_DB_SHADOW
|
||||||
createShadowDB $MONITOR_DB $MONITOR_DB_SHADOW
|
createShadowDB $MONITOR_DB $MONITOR_DB_SHADOW
|
||||||
createShadowDB $OBSERVATORY_DB $OBSERVATORY_DB_SHADOW
|
createShadowDB $OBSERVATORY_DB $OBSERVATORY_DB_SHADOW
|
||||||
|
createShadowDB USAGE_STATS_DB USAGE_STATS_DB_SHADOW
|
||||||
|
|
|
@ -12,6 +12,10 @@
|
||||||
<name>external_stats_db_name</name>
|
<name>external_stats_db_name</name>
|
||||||
<description>the external stats that should be added since they are not included in the graph database</description>
|
<description>the external stats that should be added since they are not included in the graph database</description>
|
||||||
</property>
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>usage_stats_db_name</name>
|
||||||
|
<description>the usage statistics database name</description>
|
||||||
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<name>stats_db_shadow_name</name>
|
<name>stats_db_shadow_name</name>
|
||||||
<description>the name of the shadow schema</description>
|
<description>the name of the shadow schema</description>
|
||||||
|
@ -32,6 +36,10 @@
|
||||||
<name>observatory_db_shadow_name</name>
|
<name>observatory_db_shadow_name</name>
|
||||||
<description>the name of the shadow monitor db</description>
|
<description>the name of the shadow monitor db</description>
|
||||||
</property>
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>usage_stats_db_shadow_name</name>
|
||||||
|
<description>the name of the shadow usage stats db</description>
|
||||||
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<name>stats_tool_api_url</name>
|
<name>stats_tool_api_url</name>
|
||||||
<description>The url of the API of the stats tool. Is used to trigger the cache update.</description>
|
<description>The url of the API of the stats tool. Is used to trigger the cache update.</description>
|
||||||
|
@ -434,6 +442,7 @@
|
||||||
<argument>${monitor_db_name}</argument>
|
<argument>${monitor_db_name}</argument>
|
||||||
<argument>${observatory_db_name}</argument>
|
<argument>${observatory_db_name}</argument>
|
||||||
<argument>${external_stats_db_name}</argument>
|
<argument>${external_stats_db_name}</argument>
|
||||||
|
<argument>${usage_stats_db_name}</argument>
|
||||||
<argument>${hadoop_user_name}</argument>
|
<argument>${hadoop_user_name}</argument>
|
||||||
<file>copyDataToImpalaCluster.sh</file>
|
<file>copyDataToImpalaCluster.sh</file>
|
||||||
</shell>
|
</shell>
|
||||||
|
@ -452,6 +461,8 @@
|
||||||
<argument>${monitor_db_shadow_name}</argument>
|
<argument>${monitor_db_shadow_name}</argument>
|
||||||
<argument>${observatory_db_name}</argument>
|
<argument>${observatory_db_name}</argument>
|
||||||
<argument>${observatory_db_shadow_name}</argument>
|
<argument>${observatory_db_shadow_name}</argument>
|
||||||
|
<argument>${usage_stats_db_name}</argument>
|
||||||
|
<argument>${usage_stats_db_shadow_name}</argument>
|
||||||
<file>finalizeImpalaCluster.sh</file>
|
<file>finalizeImpalaCluster.sh</file>
|
||||||
</shell>
|
</shell>
|
||||||
<ok to="Step24-updateCache"/>
|
<ok to="Step24-updateCache"/>
|
||||||
|
|
Loading…
Reference in New Issue