forked from D-Net/dnet-hadoop
Changes 05012023
This commit is contained in:
parent
0654343479
commit
7a7eb30a3f
|
@ -21,7 +21,7 @@
|
||||||
</property>
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<name>hive_jdbc_url</name>
|
<name>hive_jdbc_url</name>
|
||||||
<value>jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000</value>
|
<value>jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000/;UseNativeQuery=1;?spark.executor.memory=19166291558;spark.yarn.executor.memoryOverhead=3225;spark.driver.memory=11596411699;spark.yarn.driver.memoryOverhead=1228</value>
|
||||||
</property>
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<name>oozie.wf.workflow.notification.url</name>
|
<name>oozie.wf.workflow.notification.url</name>
|
||||||
|
|
|
@ -6,19 +6,24 @@ then
|
||||||
ln -sfn ${PYTHON_EGG_CACHE}${link_folder} ${link_folder}
|
ln -sfn ${PYTHON_EGG_CACHE}${link_folder} ${link_folder}
|
||||||
fi
|
fi
|
||||||
|
|
||||||
export TARGET=$1
|
export SOURCE=$1
|
||||||
export SCRIPT_PATH=$2
|
export TARGET=$2
|
||||||
|
export SCRIPT_PATH=$3
|
||||||
export HIVE_OPTS="-hiveconf mapred.job.queue.name=analytics -hiveconf hive.spark.client.connect.timeout=120000ms -hiveconf hive.spark.client.server.connect.timeout=300000ms -hiveconf spark.executor.memory=4831838208 -hiveconf spark.yarn.executor.memoryOverhead=450"
|
export HIVE_OPTS="-hiveconf mapred.job.queue.name=analytics -hiveconf hive.spark.client.connect.timeout=120000ms -hiveconf hive.spark.client.server.connect.timeout=300000ms -hiveconf spark.executor.memory=4831838208 -hiveconf spark.yarn.executor.memoryOverhead=450"
|
||||||
#export HADOOP_USER="dimitris.pierrakos"
|
export HADOOP_USER_NAME="oozie"
|
||||||
|
|
||||||
echo "Getting file from " $SCRIPT_PATH
|
echo "Getting file from " $SCRIPT_PATH
|
||||||
hdfs dfs -copyToLocal $SCRIPT_PATH
|
hdfs dfs -copyToLocal $SCRIPT_PATH
|
||||||
#hive -e "create database if not exists ${TARGET}"
|
#hive -e "create database if not exists ${TARGET}"
|
||||||
|
|
||||||
echo "Creating indicators"
|
echo "Creating indicators"
|
||||||
|
hive -e "create database if not exists ${TARGET}"
|
||||||
#hive $HIVE_OPTS --database ${TARGET} -e "show tables" | grep -v WARN | sed "s/^\(.*\)/analyze table ${TARGET}.\1 compute statistics;/" > foo
|
#hive $HIVE_OPTS --database ${TARGET} -e "show tables" | grep -v WARN | sed "s/^\(.*\)/analyze table ${TARGET}.\1 compute statistics;/" > foo
|
||||||
#hive $HIVE_OPTS -f foo
|
#hive $HIVE_OPTS -f foo
|
||||||
#hive $HIVE_OPTS --database ${TARGET} -f Step1.sql
|
#hive $HIVE_OPTS --database ${SOURCE} -f Step1.sql
|
||||||
#cat Step1.sql > foo
|
#cat Step1.sql > foo
|
||||||
hive -f Step1.sql
|
#cat Step1.sql | sed s/TARGET/$TARGET/g | sed s/SOURCE/$SOURCE/g1 > foo
|
||||||
|
#cat Step1.sql | sed "s/TARGET/openaire_beta_indicators_20221216/g | sed s/SOURCE/openaire_beta_stats_20220930_dimhive10/g1" > foo
|
||||||
|
cat Step1.sql | sed "s/TARGET/${TARGET}/g" | sed "s/SOURCE/${SOURCE}/g" > foo
|
||||||
|
hive -f foo
|
||||||
echo "Indicators created"
|
echo "Indicators created"
|
||||||
|
|
|
@ -1,14 +1,13 @@
|
||||||
select * from openaire_beta_stats_20220930_dimhive10.datasource_oids
|
select * from SOURCE.datasource_oids
|
||||||
limit 10;
|
limit 10;
|
||||||
|
|
||||||
CREATE database openaire_beta_indicators_20220930;
|
--CREATE database TARGET;
|
||||||
|
|
||||||
|
create table TARGET.tmp_result stored as parquet as
|
||||||
create table openaire_beta_indicators_20220930.tmp_result stored as parquet as
|
select * from SOURCE.result
|
||||||
select * from openaire_beta_stats_20220930_dimhive10.result
|
|
||||||
limit 10;
|
limit 10;
|
||||||
|
|
||||||
create table openaire_beta_indicators_20220930.tmp_datasource stored as parquet as
|
create table TARGET.tmp_datasource stored as parquet as
|
||||||
select * from openaire_beta_stats_20220930_dimhive10.datasource
|
select * from SOURCE.datasource
|
||||||
limit 10;
|
limit 10;
|
||||||
|
|
||||||
|
|
|
@ -54,9 +54,9 @@
|
||||||
<job-tracker>${jobTracker}</job-tracker>
|
<job-tracker>${jobTracker}</job-tracker>
|
||||||
<name-node>${nameNode}</name-node>
|
<name-node>${nameNode}</name-node>
|
||||||
<exec>indicators.sh</exec>
|
<exec>indicators.sh</exec>
|
||||||
<argument>stats_db_name=${stats_db_name}</argument>
|
<argument>${stats_db_name}</argument>
|
||||||
|
<argument>${indicators_db_name}</argument>
|
||||||
<argument>${wf:appPath()}/scripts/Step1.sql</argument>
|
<argument>${wf:appPath()}/scripts/Step1.sql</argument>
|
||||||
<env-var>HADOOP_USER_NAME=${wf:user()}</env-var>
|
|
||||||
<file>indicators.sh</file>
|
<file>indicators.sh</file>
|
||||||
</shell>
|
</shell>
|
||||||
<ok to="End"/>
|
<ok to="End"/>
|
||||||
|
|
|
@ -70,13 +70,14 @@ public abstract class ConnectDB {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String getUsageStatsDBSchema() {
|
public static String getUsageStatsDBSchema() {
|
||||||
String datePattern = "YYYYMMdd";
|
// String datePattern = "YYYYMMdd";
|
||||||
DateFormat df = new SimpleDateFormat(datePattern);
|
// DateFormat df = new SimpleDateFormat(datePattern);
|
||||||
// Get the today date using Calendar object.
|
//// Get the today date using Calendar object.
|
||||||
Date today = Calendar.getInstance().getTime();
|
// Date today = Calendar.getInstance().getTime();
|
||||||
String todayAsString = df.format(today);
|
// String todayAsString = df.format(today);
|
||||||
|
|
||||||
return ConnectDB.usageStatsDBSchema + todayAsString;
|
// return ConnectDB.usageStatsDBSchema + todayAsString;
|
||||||
|
return ConnectDB.usageStatsDBSchema;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String getStatsDBSchema() {
|
public static String getStatsDBSchema() {
|
||||||
|
|
Loading…
Reference in New Issue