Changes 05012023

This commit is contained in:
dimitrispie 2023-01-05 10:37:33 +02:00
parent 0654343479
commit 7a7eb30a3f
5 changed files with 26 additions and 21 deletions

View File

@ -21,7 +21,7 @@
</property>
<property>
<name>hive_jdbc_url</name>
<value>jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000</value>
<value>jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000/;UseNativeQuery=1;?spark.executor.memory=19166291558;spark.yarn.executor.memoryOverhead=3225;spark.driver.memory=11596411699;spark.yarn.driver.memoryOverhead=1228</value>
</property>
<property>
<name>oozie.wf.workflow.notification.url</name>

View File

@ -6,19 +6,24 @@ then
ln -sfn ${PYTHON_EGG_CACHE}${link_folder} ${link_folder}
fi
export TARGET=$1
export SCRIPT_PATH=$2
export SOURCE=$1
export TARGET=$2
export SCRIPT_PATH=$3
export HIVE_OPTS="-hiveconf mapred.job.queue.name=analytics -hiveconf hive.spark.client.connect.timeout=120000ms -hiveconf hive.spark.client.server.connect.timeout=300000ms -hiveconf spark.executor.memory=4831838208 -hiveconf spark.yarn.executor.memoryOverhead=450"
#export HADOOP_USER="dimitris.pierrakos"
export HADOOP_USER_NAME="oozie"
echo "Getting file from " $SCRIPT_PATH
hdfs dfs -copyToLocal $SCRIPT_PATH
#hive -e "create database if not exists ${TARGET}"
echo "Creating indicators"
hive -e "create database if not exists ${TARGET}"
#hive $HIVE_OPTS --database ${TARGET} -e "show tables" | grep -v WARN | sed "s/^\(.*\)/analyze table ${TARGET}.\1 compute statistics;/" > foo
#hive $HIVE_OPTS -f foo
#hive $HIVE_OPTS --database ${TARGET} -f Step1.sql
#hive $HIVE_OPTS --database ${SOURCE} -f Step1.sql
#cat Step1.sql > foo
hive -f Step1.sql
#cat Step1.sql | sed s/TARGET/$TARGET/g | sed s/SOURCE/$SOURCE/g1 > foo
#cat Step1.sql | sed "s/TARGET/openaire_beta_indicators_20221216/g | sed s/SOURCE/openaire_beta_stats_20220930_dimhive10/g1" > foo
cat Step1.sql | sed "s/TARGET/${TARGET}/g" | sed "s/SOURCE/${SOURCE}/g" > foo
hive -f foo
echo "Indicators created"

View File

@ -1,14 +1,13 @@
select * from openaire_beta_stats_20220930_dimhive10.datasource_oids
select * from SOURCE.datasource_oids
limit 10;
CREATE database openaire_beta_indicators_20220930;
--CREATE database TARGET;
create table openaire_beta_indicators_20220930.tmp_result stored as parquet as
select * from openaire_beta_stats_20220930_dimhive10.result
create table TARGET.tmp_result stored as parquet as
select * from SOURCE.result
limit 10;
create table openaire_beta_indicators_20220930.tmp_datasource stored as parquet as
select * from openaire_beta_stats_20220930_dimhive10.datasource
create table TARGET.tmp_datasource stored as parquet as
select * from SOURCE.datasource
limit 10;

View File

@ -54,9 +54,9 @@
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<exec>indicators.sh</exec>
<argument>stats_db_name=${stats_db_name}</argument>
<argument>${stats_db_name}</argument>
<argument>${indicators_db_name}</argument>
<argument>${wf:appPath()}/scripts/Step1.sql</argument>
<env-var>HADOOP_USER_NAME=${wf:user()}</env-var>
<file>indicators.sh</file>
</shell>
<ok to="End"/>

View File

@ -70,13 +70,14 @@ public abstract class ConnectDB {
}
public static String getUsageStatsDBSchema() {
String datePattern = "YYYYMMdd";
DateFormat df = new SimpleDateFormat(datePattern);
// Get the today date using Calendar object.
Date today = Calendar.getInstance().getTime();
String todayAsString = df.format(today);
// String datePattern = "YYYYMMdd";
// DateFormat df = new SimpleDateFormat(datePattern);
//// Get the today date using Calendar object.
// Date today = Calendar.getInstance().getTime();
// String todayAsString = df.format(today);
return ConnectDB.usageStatsDBSchema + todayAsString;
// return ConnectDB.usageStatsDBSchema + todayAsString;
return ConnectDB.usageStatsDBSchema;
}
public static String getStatsDBSchema() {