Changes 05012023

This commit is contained in:
dimitrispie 2023-01-05 10:37:33 +02:00
parent 0654343479
commit 7a7eb30a3f
5 changed files with 26 additions and 21 deletions

View File

@ -21,7 +21,7 @@
</property> </property>
<property> <property>
<name>hive_jdbc_url</name> <name>hive_jdbc_url</name>
<value>jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000</value> <value>jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000/;UseNativeQuery=1;?spark.executor.memory=19166291558;spark.yarn.executor.memoryOverhead=3225;spark.driver.memory=11596411699;spark.yarn.driver.memoryOverhead=1228</value>
</property> </property>
<property> <property>
<name>oozie.wf.workflow.notification.url</name> <name>oozie.wf.workflow.notification.url</name>

View File

@ -6,19 +6,24 @@ then
ln -sfn ${PYTHON_EGG_CACHE}${link_folder} ${link_folder} ln -sfn ${PYTHON_EGG_CACHE}${link_folder} ${link_folder}
fi fi
export TARGET=$1 export SOURCE=$1
export SCRIPT_PATH=$2 export TARGET=$2
export SCRIPT_PATH=$3
export HIVE_OPTS="-hiveconf mapred.job.queue.name=analytics -hiveconf hive.spark.client.connect.timeout=120000ms -hiveconf hive.spark.client.server.connect.timeout=300000ms -hiveconf spark.executor.memory=4831838208 -hiveconf spark.yarn.executor.memoryOverhead=450" export HIVE_OPTS="-hiveconf mapred.job.queue.name=analytics -hiveconf hive.spark.client.connect.timeout=120000ms -hiveconf hive.spark.client.server.connect.timeout=300000ms -hiveconf spark.executor.memory=4831838208 -hiveconf spark.yarn.executor.memoryOverhead=450"
#export HADOOP_USER="dimitris.pierrakos" export HADOOP_USER_NAME="oozie"
echo "Getting file from " $SCRIPT_PATH echo "Getting file from " $SCRIPT_PATH
hdfs dfs -copyToLocal $SCRIPT_PATH hdfs dfs -copyToLocal $SCRIPT_PATH
#hive -e "create database if not exists ${TARGET}" #hive -e "create database if not exists ${TARGET}"
echo "Creating indicators" echo "Creating indicators"
hive -e "create database if not exists ${TARGET}"
#hive $HIVE_OPTS --database ${TARGET} -e "show tables" | grep -v WARN | sed "s/^\(.*\)/analyze table ${TARGET}.\1 compute statistics;/" > foo #hive $HIVE_OPTS --database ${TARGET} -e "show tables" | grep -v WARN | sed "s/^\(.*\)/analyze table ${TARGET}.\1 compute statistics;/" > foo
#hive $HIVE_OPTS -f foo #hive $HIVE_OPTS -f foo
#hive $HIVE_OPTS --database ${TARGET} -f Step1.sql #hive $HIVE_OPTS --database ${SOURCE} -f Step1.sql
#cat Step1.sql > foo #cat Step1.sql > foo
hive -f Step1.sql #cat Step1.sql | sed s/TARGET/$TARGET/g | sed s/SOURCE/$SOURCE/g1 > foo
#cat Step1.sql | sed "s/TARGET/openaire_beta_indicators_20221216/g | sed s/SOURCE/openaire_beta_stats_20220930_dimhive10/g1" > foo
cat Step1.sql | sed "s/TARGET/${TARGET}/g" | sed "s/SOURCE/${SOURCE}/g" > foo
hive -f foo
echo "Indicators created" echo "Indicators created"

View File

@ -1,14 +1,13 @@
select * from openaire_beta_stats_20220930_dimhive10.datasource_oids select * from SOURCE.datasource_oids
limit 10; limit 10;
CREATE database openaire_beta_indicators_20220930; --CREATE database TARGET;
create table TARGET.tmp_result stored as parquet as
create table openaire_beta_indicators_20220930.tmp_result stored as parquet as select * from SOURCE.result
select * from openaire_beta_stats_20220930_dimhive10.result
limit 10; limit 10;
create table openaire_beta_indicators_20220930.tmp_datasource stored as parquet as create table TARGET.tmp_datasource stored as parquet as
select * from openaire_beta_stats_20220930_dimhive10.datasource select * from SOURCE.datasource
limit 10; limit 10;

View File

@ -54,9 +54,9 @@
<job-tracker>${jobTracker}</job-tracker> <job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node> <name-node>${nameNode}</name-node>
<exec>indicators.sh</exec> <exec>indicators.sh</exec>
<argument>stats_db_name=${stats_db_name}</argument> <argument>${stats_db_name}</argument>
<argument>${indicators_db_name}</argument>
<argument>${wf:appPath()}/scripts/Step1.sql</argument> <argument>${wf:appPath()}/scripts/Step1.sql</argument>
<env-var>HADOOP_USER_NAME=${wf:user()}</env-var>
<file>indicators.sh</file> <file>indicators.sh</file>
</shell> </shell>
<ok to="End"/> <ok to="End"/>

View File

@ -70,13 +70,14 @@ public abstract class ConnectDB {
} }
public static String getUsageStatsDBSchema() { public static String getUsageStatsDBSchema() {
String datePattern = "YYYYMMdd"; // String datePattern = "YYYYMMdd";
DateFormat df = new SimpleDateFormat(datePattern); // DateFormat df = new SimpleDateFormat(datePattern);
// Get the today date using Calendar object. //// Get the today date using Calendar object.
Date today = Calendar.getInstance().getTime(); // Date today = Calendar.getInstance().getTime();
String todayAsString = df.format(today); // String todayAsString = df.format(today);
return ConnectDB.usageStatsDBSchema + todayAsString; // return ConnectDB.usageStatsDBSchema + todayAsString;
return ConnectDB.usageStatsDBSchema;
} }
public static String getStatsDBSchema() { public static String getStatsDBSchema() {