dnet-hadoop/dhp-workflows/dhp-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/indicators/oozie_app/indicators.sh

30 lines
1.3 KiB
Bash
Raw Normal View History

2022-11-03 14:55:27 +01:00
export PYTHON_EGG_CACHE=/home/$(whoami)/.python-eggs
export link_folder=/tmp/impala-shell-python-egg-cache-$(whoami)
if ! [ -L $link_folder ]
then
rm -Rf "$link_folder"
ln -sfn ${PYTHON_EGG_CACHE}${link_folder} ${link_folder}
fi
2023-01-05 09:37:33 +01:00
export SOURCE=$1
export TARGET=$2
export SCRIPT_PATH=$3
2022-11-03 14:55:27 +01:00
export HIVE_OPTS="-hiveconf mapred.job.queue.name=analytics -hiveconf hive.spark.client.connect.timeout=120000ms -hiveconf hive.spark.client.server.connect.timeout=300000ms -hiveconf spark.executor.memory=4831838208 -hiveconf spark.yarn.executor.memoryOverhead=450"
2023-01-05 09:37:33 +01:00
export HADOOP_USER_NAME="oozie"
2022-11-03 14:55:27 +01:00
echo "Getting file from " $SCRIPT_PATH
hdfs dfs -copyToLocal $SCRIPT_PATH
#hive -e "create database if not exists ${TARGET}"
echo "Creating indicators"
2023-01-05 09:37:33 +01:00
hive -e "create database if not exists ${TARGET}"
2022-11-03 14:55:27 +01:00
#hive $HIVE_OPTS --database ${TARGET} -e "show tables" | grep -v WARN | sed "s/^\(.*\)/analyze table ${TARGET}.\1 compute statistics;/" > foo
#hive $HIVE_OPTS -f foo
2023-01-05 09:37:33 +01:00
#hive $HIVE_OPTS --database ${SOURCE} -f Step1.sql
2022-11-03 14:55:27 +01:00
#cat Step1.sql > foo
2023-01-05 09:37:33 +01:00
#cat Step1.sql | sed s/TARGET/$TARGET/g | sed s/SOURCE/$SOURCE/g1 > foo
#cat Step1.sql | sed "s/TARGET/openaire_beta_indicators_20221216/g | sed s/SOURCE/openaire_beta_stats_20220930_dimhive10/g1" > foo
cat Step1.sql | sed "s/TARGET/${TARGET}/g" | sed "s/SOURCE/${SOURCE}/g" > foo
hive -f foo
2022-11-03 14:55:27 +01:00
echo "Indicators created"