Creation of Sarc JSON tables

This commit is contained in:
Spyros Zoupanos 2020-09-16 21:46:32 +03:00
parent 1dcb197f02
commit 958fb1a343
2 changed files with 105 additions and 5 deletions

View File

@ -8,6 +8,7 @@ import java.net.URL;
import java.net.URLConnection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.text.SimpleDateFormat;
import java.util.Calendar;
@ -65,10 +66,108 @@ public class SarcStats {
}
}
public void processSarc() {
public void processSarc() throws Exception {
Statement stmt = ConnectDB.getConnection().createStatement();
ConnectDB.getConnection().setAutoCommit(false);
System.out.println("====> Adding JSON Serde jar");
stmt.executeUpdate("add jar /usr/share/cmf/common_jars/hive-hcatalog-core-1.1.0-cdh5.14.0.jar");
System.out.println("====> Added JSON Serde jar");
System.out.println("====> Dropping sarc_sushilogtmp_json_array table");
String drop_sarc_sushilogtmp_json_array = "DROP TABLE IF EXISTS " +
ConnectDB.getUsageStatsDBSchema() +
".sarc_sushilogtmp_json_array";
stmt.executeUpdate(drop_sarc_sushilogtmp_json_array);
System.out.println("====> Dropped sarc_sushilogtmp_json_array table");
System.out.println("====> Creating sarc_sushilogtmp_json_array table");
String create_sarc_sushilogtmp_json_array = "CREATE EXTERNAL TABLE IF NOT EXISTS " +
ConnectDB.getUsageStatsDBSchema() + ".sarc_sushilogtmp_json_array(\n" +
" `ItemIdentifier` ARRAY<\n" +
" struct<\n" +
" `Type`: STRING,\n" +
" `Value`: STRING\n" +
" >\n" +
" >,\n" +
" `ItemPerformance` struct<\n" +
" `Period`: struct<\n" +
" `Begin`: STRING,\n" +
" `End`: STRING\n" +
" >,\n" +
" `Instance`: struct<\n" +
" `Count`: STRING,\n" +
" `MetricType`: STRING\n" +
" >\n" +
" >\n" +
")" +
"ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'\n" +
"LOCATION '" + UsageStatsExporter.sarcsReportPathArray + "'\n" +
"TBLPROPERTIES (\"transactional\"=\"false\")";
stmt.executeUpdate(create_sarc_sushilogtmp_json_array);
System.out.println("====> Created sarc_sushilogtmp_json_array table");
System.out.println("====> Dropping sarc_sushilogtmp_json_non_array table");
String drop_sarc_sushilogtmp_json_non_array = "DROP TABLE IF EXISTS " +
ConnectDB.getUsageStatsDBSchema() +
".sarc_sushilogtmp_json_non_array";
stmt.executeUpdate(drop_sarc_sushilogtmp_json_non_array);
System.out.println("====> Dropped sarc_sushilogtmp_json_non_array table");
System.out.println("====> Creating sarc_sushilogtmp_json_non_array table");
String create_sarc_sushilogtmp_json_non_array = "CREATE EXTERNAL TABLE IF NOT EXISTS " +
ConnectDB.getUsageStatsDBSchema() + ".sarc_sushilogtmp_json_non_array(\n" +
" `ItemIdentifier` struct<\n" +
" `Type`: STRING,\n" +
" `Value`: STRING\n" +
" >,\n" +
" `ItemPerformance` struct<\n" +
" `Period`: struct<\n" +
" `Begin`: STRING,\n" +
" `End`: STRING\n" +
" >,\n" +
" `Instance`: struct<\n" +
" `Count`: STRING,\n" +
" `MetricType`: STRING\n" +
" >\n" +
" >" +
")" +
"ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'\n" +
"LOCATION '" + UsageStatsExporter.sarcsReportPathNonArray + "'\n" +
"TBLPROPERTIES (\"transactional\"=\"false\")";
stmt.executeUpdate(create_sarc_sushilogtmp_json_non_array);
System.out.println("====> Created sarc_sushilogtmp_json_non_array table");
// System.out.println("====> Dropping sushilogtmp table");
// String drop_sushilogtmp = "DROP TABLE IF EXISTS " +
// ConnectDB.getUsageStatsDBSchema() +
// ".sushilogtmp";
// stmt.executeUpdate(drop_sushilogtmp);
// System.out.println("====> Dropped sushilogtmp table");
//
// System.out.println("====> Creating sushilogtmp table");
// String create_sushilogtmp = "CREATE TABLE " + ConnectDB.getUsageStatsDBSchema()
// + ".sushilogtmp(source STRING, repository STRING, " +
// "rid STRING, date STRING, metric_type STRING, count INT) clustered by (source) into 100 buckets stored as orc "
// +
// "tblproperties('transactional'='true')";
// stmt.executeUpdate(create_sushilogtmp);
// System.out.println("====> Created sushilogtmp table");
//
// System.out.println("====> Inserting to sushilogtmp table");
// String insert_sushilogtmp = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".sushilogtmp " +
// "SELECT 'IRUS-UK', 'opendoar____::', `ItemIdent`.`Value`, `ItemPerf`.`Period`.`Begin`, " +
// "`ItemPerf`.`Instance`.`MetricType`, `ItemPerf`.`Instance`.`Count` " +
// "FROM " + ConnectDB.getUsageStatsDBSchema() + ".sushilogtmp_json " +
// "LATERAL VIEW posexplode(ItemIdentifier) ItemIdentifierTable AS seqi, ItemIdent " +
// "LATERAL VIEW posexplode(ItemPerformance) ItemPerformanceTable AS seqp, ItemPerf " +
// "WHERE `ItemIdent`.`Type`= 'OAI'";
// stmt.executeUpdate(insert_sushilogtmp);
// System.out.println("====> Inserted to sushilogtmp table");
ConnectDB.getConnection().close();
}
public void getSarc(String sarcsReportPathArray, String sarcsReportPathNonArray) throws Exception {
// There was a problem to download the following file
getARReport(

View File

@ -60,8 +60,9 @@ public class UsageStatsExporter {
// log.info("irus done");
SarcStats sarcStats = new SarcStats();
sarcStats.getSarc(sarcsReportPathArray, sarcsReportPathNonArray);
sarcStats.sarcStats();
// sarcStats.getSarc(sarcsReportPathArray, sarcsReportPathNonArray);
sarcStats.processSarc();
// sarcStats.sarcStats();
log.info("sarc done");
// // finalize usagestats