From 7a7eb30a3f3cfef1d1c6ce940d1579348c8e848b Mon Sep 17 00:00:00 2001 From: dimitrispie Date: Thu, 5 Jan 2023 10:37:33 +0200 Subject: [PATCH] Changes 05012023 --- .../graph/indicators/oozie_app/config-default.xml | 2 +- .../oa/graph/indicators/oozie_app/indicators.sh | 15 ++++++++++----- .../graph/indicators/oozie_app/scripts/Step1.sql | 13 ++++++------- .../oa/graph/indicators/oozie_app/workflow.xml | 4 ++-- .../graph/usagestatsbuild/export/ConnectDB.java | 13 +++++++------ 5 files changed, 26 insertions(+), 21 deletions(-) diff --git a/dhp-workflows/dhp-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/indicators/oozie_app/config-default.xml b/dhp-workflows/dhp-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/indicators/oozie_app/config-default.xml index 6d255a7f4..f3ce0faaf 100755 --- a/dhp-workflows/dhp-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/indicators/oozie_app/config-default.xml +++ b/dhp-workflows/dhp-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/indicators/oozie_app/config-default.xml @@ -21,7 +21,7 @@ hive_jdbc_url - jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000 + jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000/;UseNativeQuery=1;?spark.executor.memory=19166291558;spark.yarn.executor.memoryOverhead=3225;spark.driver.memory=11596411699;spark.yarn.driver.memoryOverhead=1228 oozie.wf.workflow.notification.url diff --git a/dhp-workflows/dhp-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/indicators/oozie_app/indicators.sh b/dhp-workflows/dhp-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/indicators/oozie_app/indicators.sh index 671346438..91fc1e78b 100755 --- a/dhp-workflows/dhp-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/indicators/oozie_app/indicators.sh +++ b/dhp-workflows/dhp-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/indicators/oozie_app/indicators.sh @@ -6,19 +6,24 @@ then ln -sfn ${PYTHON_EGG_CACHE}${link_folder} ${link_folder} fi -export TARGET=$1 -export SCRIPT_PATH=$2 +export SOURCE=$1 +export TARGET=$2 +export SCRIPT_PATH=$3 export HIVE_OPTS="-hiveconf mapred.job.queue.name=analytics -hiveconf hive.spark.client.connect.timeout=120000ms -hiveconf hive.spark.client.server.connect.timeout=300000ms -hiveconf spark.executor.memory=4831838208 -hiveconf spark.yarn.executor.memoryOverhead=450" -#export HADOOP_USER="dimitris.pierrakos" +export HADOOP_USER_NAME="oozie" echo "Getting file from " $SCRIPT_PATH hdfs dfs -copyToLocal $SCRIPT_PATH #hive -e "create database if not exists ${TARGET}" echo "Creating indicators" +hive -e "create database if not exists ${TARGET}" #hive $HIVE_OPTS --database ${TARGET} -e "show tables" | grep -v WARN | sed "s/^\(.*\)/analyze table ${TARGET}.\1 compute statistics;/" > foo #hive $HIVE_OPTS -f foo -#hive $HIVE_OPTS --database ${TARGET} -f Step1.sql +#hive $HIVE_OPTS --database ${SOURCE} -f Step1.sql #cat Step1.sql > foo -hive -f Step1.sql +#cat Step1.sql | sed s/TARGET/$TARGET/g | sed s/SOURCE/$SOURCE/g1 > foo +#cat Step1.sql | sed "s/TARGET/openaire_beta_indicators_20221216/g | sed s/SOURCE/openaire_beta_stats_20220930_dimhive10/g1" > foo +cat Step1.sql | sed "s/TARGET/${TARGET}/g" | sed "s/SOURCE/${SOURCE}/g" > foo +hive -f foo echo "Indicators created" diff --git a/dhp-workflows/dhp-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/indicators/oozie_app/scripts/Step1.sql b/dhp-workflows/dhp-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/indicators/oozie_app/scripts/Step1.sql index 9c71da3d5..b8fb77a02 100755 --- a/dhp-workflows/dhp-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/indicators/oozie_app/scripts/Step1.sql +++ b/dhp-workflows/dhp-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/indicators/oozie_app/scripts/Step1.sql @@ -1,14 +1,13 @@ -select * from openaire_beta_stats_20220930_dimhive10.datasource_oids +select * from SOURCE.datasource_oids limit 10; -CREATE database openaire_beta_indicators_20220930; +--CREATE database TARGET; - -create table openaire_beta_indicators_20220930.tmp_result stored as parquet as -select * from openaire_beta_stats_20220930_dimhive10.result +create table TARGET.tmp_result stored as parquet as +select * from SOURCE.result limit 10; -create table openaire_beta_indicators_20220930.tmp_datasource stored as parquet as -select * from openaire_beta_stats_20220930_dimhive10.datasource +create table TARGET.tmp_datasource stored as parquet as +select * from SOURCE.datasource limit 10; diff --git a/dhp-workflows/dhp-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/indicators/oozie_app/workflow.xml b/dhp-workflows/dhp-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/indicators/oozie_app/workflow.xml index e51306df7..2efc3877d 100755 --- a/dhp-workflows/dhp-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/indicators/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/indicators/oozie_app/workflow.xml @@ -54,9 +54,9 @@ ${jobTracker} ${nameNode} indicators.sh - stats_db_name=${stats_db_name} + ${stats_db_name} + ${indicators_db_name} ${wf:appPath()}/scripts/Step1.sql - HADOOP_USER_NAME=${wf:user()} indicators.sh diff --git a/dhp-workflows/dhp-usage-stats-build/src/main/java/eu/dnetlib/oa/graph/usagestatsbuild/export/ConnectDB.java b/dhp-workflows/dhp-usage-stats-build/src/main/java/eu/dnetlib/oa/graph/usagestatsbuild/export/ConnectDB.java index e5c89684c..be7ce8afa 100755 --- a/dhp-workflows/dhp-usage-stats-build/src/main/java/eu/dnetlib/oa/graph/usagestatsbuild/export/ConnectDB.java +++ b/dhp-workflows/dhp-usage-stats-build/src/main/java/eu/dnetlib/oa/graph/usagestatsbuild/export/ConnectDB.java @@ -70,13 +70,14 @@ public abstract class ConnectDB { } public static String getUsageStatsDBSchema() { - String datePattern = "YYYYMMdd"; - DateFormat df = new SimpleDateFormat(datePattern); -// Get the today date using Calendar object. - Date today = Calendar.getInstance().getTime(); - String todayAsString = df.format(today); +// String datePattern = "YYYYMMdd"; +// DateFormat df = new SimpleDateFormat(datePattern); +//// Get the today date using Calendar object. +// Date today = Calendar.getInstance().getTime(); +// String todayAsString = df.format(today); - return ConnectDB.usageStatsDBSchema + todayAsString; +// return ConnectDB.usageStatsDBSchema + todayAsString; + return ConnectDB.usageStatsDBSchema; } public static String getStatsDBSchema() {