forked from D-Net/dnet-hadoop
Changes 05012023
This commit is contained in:
parent
0654343479
commit
7a7eb30a3f
|
@ -21,7 +21,7 @@
|
|||
</property>
|
||||
<property>
|
||||
<name>hive_jdbc_url</name>
|
||||
<value>jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000</value>
|
||||
<value>jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000/;UseNativeQuery=1;?spark.executor.memory=19166291558;spark.yarn.executor.memoryOverhead=3225;spark.driver.memory=11596411699;spark.yarn.driver.memoryOverhead=1228</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.wf.workflow.notification.url</name>
|
||||
|
|
|
@ -6,19 +6,24 @@ then
|
|||
ln -sfn ${PYTHON_EGG_CACHE}${link_folder} ${link_folder}
|
||||
fi
|
||||
|
||||
export TARGET=$1
|
||||
export SCRIPT_PATH=$2
|
||||
export SOURCE=$1
|
||||
export TARGET=$2
|
||||
export SCRIPT_PATH=$3
|
||||
export HIVE_OPTS="-hiveconf mapred.job.queue.name=analytics -hiveconf hive.spark.client.connect.timeout=120000ms -hiveconf hive.spark.client.server.connect.timeout=300000ms -hiveconf spark.executor.memory=4831838208 -hiveconf spark.yarn.executor.memoryOverhead=450"
|
||||
#export HADOOP_USER="dimitris.pierrakos"
|
||||
export HADOOP_USER_NAME="oozie"
|
||||
|
||||
echo "Getting file from " $SCRIPT_PATH
|
||||
hdfs dfs -copyToLocal $SCRIPT_PATH
|
||||
#hive -e "create database if not exists ${TARGET}"
|
||||
|
||||
echo "Creating indicators"
|
||||
hive -e "create database if not exists ${TARGET}"
|
||||
#hive $HIVE_OPTS --database ${TARGET} -e "show tables" | grep -v WARN | sed "s/^\(.*\)/analyze table ${TARGET}.\1 compute statistics;/" > foo
|
||||
#hive $HIVE_OPTS -f foo
|
||||
#hive $HIVE_OPTS --database ${TARGET} -f Step1.sql
|
||||
#hive $HIVE_OPTS --database ${SOURCE} -f Step1.sql
|
||||
#cat Step1.sql > foo
|
||||
hive -f Step1.sql
|
||||
#cat Step1.sql | sed s/TARGET/$TARGET/g | sed s/SOURCE/$SOURCE/g1 > foo
|
||||
#cat Step1.sql | sed "s/TARGET/openaire_beta_indicators_20221216/g | sed s/SOURCE/openaire_beta_stats_20220930_dimhive10/g1" > foo
|
||||
cat Step1.sql | sed "s/TARGET/${TARGET}/g" | sed "s/SOURCE/${SOURCE}/g" > foo
|
||||
hive -f foo
|
||||
echo "Indicators created"
|
||||
|
|
|
@ -1,14 +1,13 @@
|
|||
select * from openaire_beta_stats_20220930_dimhive10.datasource_oids
|
||||
select * from SOURCE.datasource_oids
|
||||
limit 10;
|
||||
|
||||
CREATE database openaire_beta_indicators_20220930;
|
||||
--CREATE database TARGET;
|
||||
|
||||
|
||||
create table openaire_beta_indicators_20220930.tmp_result stored as parquet as
|
||||
select * from openaire_beta_stats_20220930_dimhive10.result
|
||||
create table TARGET.tmp_result stored as parquet as
|
||||
select * from SOURCE.result
|
||||
limit 10;
|
||||
|
||||
create table openaire_beta_indicators_20220930.tmp_datasource stored as parquet as
|
||||
select * from openaire_beta_stats_20220930_dimhive10.datasource
|
||||
create table TARGET.tmp_datasource stored as parquet as
|
||||
select * from SOURCE.datasource
|
||||
limit 10;
|
||||
|
||||
|
|
|
@ -54,9 +54,9 @@
|
|||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<exec>indicators.sh</exec>
|
||||
<argument>stats_db_name=${stats_db_name}</argument>
|
||||
<argument>${stats_db_name}</argument>
|
||||
<argument>${indicators_db_name}</argument>
|
||||
<argument>${wf:appPath()}/scripts/Step1.sql</argument>
|
||||
<env-var>HADOOP_USER_NAME=${wf:user()}</env-var>
|
||||
<file>indicators.sh</file>
|
||||
</shell>
|
||||
<ok to="End"/>
|
||||
|
|
|
@ -70,13 +70,14 @@ public abstract class ConnectDB {
|
|||
}
|
||||
|
||||
public static String getUsageStatsDBSchema() {
|
||||
String datePattern = "YYYYMMdd";
|
||||
DateFormat df = new SimpleDateFormat(datePattern);
|
||||
// Get the today date using Calendar object.
|
||||
Date today = Calendar.getInstance().getTime();
|
||||
String todayAsString = df.format(today);
|
||||
// String datePattern = "YYYYMMdd";
|
||||
// DateFormat df = new SimpleDateFormat(datePattern);
|
||||
//// Get the today date using Calendar object.
|
||||
// Date today = Calendar.getInstance().getTime();
|
||||
// String todayAsString = df.format(today);
|
||||
|
||||
return ConnectDB.usageStatsDBSchema + todayAsString;
|
||||
// return ConnectDB.usageStatsDBSchema + todayAsString;
|
||||
return ConnectDB.usageStatsDBSchema;
|
||||
}
|
||||
|
||||
public static String getStatsDBSchema() {
|
||||
|
|
Loading…
Reference in New Issue