More progress on processlaReferenciaLog

This commit is contained in:
Spyros Zoupanos 2020-09-20 15:07:09 +03:00
parent a2d64b4644
commit 3c11acde0c
2 changed files with 40 additions and 8 deletions

View File

@ -127,14 +127,14 @@ public class LaReferenciaStats {
ConnectDB.getConnection().setAutoCommit(false);
System.out.println("====> Dropping lareferencialogtmp_json table");
String drop_piwiklogtmp_json = "DROP TABLE IF EXISTS " +
String drop_lareferencialogtmp_json = "DROP TABLE IF EXISTS " +
ConnectDB.getUsageStatsDBSchema() +
".lareferencialogtmp_json";
stmt.executeUpdate(drop_piwiklogtmp_json);
stmt.executeUpdate(drop_lareferencialogtmp_json);
System.out.println("====> Dropped lareferencialogtmp_json table");
System.out.println("====> Creating lareferencialogtmp_json");
String create_piwiklogtmp_json = "CREATE EXTERNAL TABLE IF NOT EXISTS " +
String create_lareferencialogtmp_json = "CREATE EXTERNAL TABLE IF NOT EXISTS " +
ConnectDB.getUsageStatsDBSchema() +
".lareferencialogtmp_json(\n" +
" `idSite` STRING,\n" +
@ -162,8 +162,40 @@ public class LaReferenciaStats {
"ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'\n" +
"LOCATION '" + UsageStatsExporter.lareferenciaLogPath + "'\n" +
"TBLPROPERTIES (\"transactional\"=\"false\")";
stmt.executeUpdate(create_piwiklogtmp_json);
stmt.executeUpdate(create_lareferencialogtmp_json);
System.out.println("====> Created lareferencialogtmp_json");
System.out.println("====> Dropping lareferencialogtmp table");
String drop_lareferencialogtmp = "DROP TABLE IF EXISTS " +
ConnectDB.getUsageStatsDBSchema() +
".lareferencialogtmp";
stmt.executeUpdate(drop_lareferencialogtmp);
System.out.println("====> Dropped lareferencialogtmp table");
System.out.println("====> Creating lareferencialogtmp");
String create_lareferencialogtmp = "CREATE TABLE " +
ConnectDB.getUsageStatsDBSchema() + ".lareferencialogtmp(matomoid INT, " +
"source STRING, id_visit STRING, country STRING, action STRING, url STRING, entity_id STRING, " +
"source_item_type STRING, timestamp STRING, referrer_name STRING, agent STRING) " +
"clustered by (source, id_visit, action, timestamp, entity_id) into 100 buckets " +
"stored as orc tblproperties('transactional'='true')";
stmt.executeUpdate(create_lareferencialogtmp);
System.out.println("====> Created lareferencialogtmp");
System.out.println("====> Inserting into lareferencialogtmp");
String insert_lareferencialogtmp = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".lareferencialogtmp " +
"SELECT DISTINCT cast(idSite as INT) as matomoid, CONCAT('opendoar____::', " +
"actiondetail.customVariables.`2`.customVariablePageValue2) as source, idVisit as id_Visit, country, " +
"actiondetail.type as action, actiondetail.url as url, " +
"actiondetail.customVariables.`1`.`customVariablePageValue1` as entity_id, " +
"'repItem' as source_item_type, from_unixtime(cast(actiondetail.timestamp as BIGINT)) as timestamp, " +
"referrerName as referrer_name, browser as agent " +
"FROM " + ConnectDB.getUsageStatsDBSchema() + ".lareferencialogtmp_json " +
"LATERAL VIEW explode(actiondetails) actiondetailsTable AS actiondetail";
stmt.executeUpdate(insert_lareferencialogtmp);
System.out.println("====> Inserted into lareferencialogtmp");
stmt.close();
}
public void processlaReferenciaLogOld() throws Exception {

View File

@ -173,7 +173,7 @@ public class PiwikStatsDB {
log.info("repository process done");
System.out.println("====> Removing double clicks");
// removeDoubleClicks();
removeDoubleClicks();
System.out.println("====> Removing double clicks done");
log.info("removing double clicks done");
@ -183,7 +183,7 @@ public class PiwikStatsDB {
log.info("cleaning oai done");
System.out.println("====> ViewsStats processing starts");
// viewsStats();
viewsStats();
System.out.println("====> ViewsStats processing ends");
System.out.println("====> DownloadsStats processing starts");
@ -263,12 +263,12 @@ public class PiwikStatsDB {
stmt.executeUpdate(create_piwiklogtmp_json);
System.out.println("====> Created piwiklogtmp_json");
System.out.println("====> Droping piwiklogtmp table");
System.out.println("====> Dropping piwiklogtmp table");
String drop_piwiklogtmp = "DROP TABLE IF EXISTS " +
ConnectDB.getUsageStatsDBSchema() +
".piwiklogtmp";
stmt.executeUpdate(drop_piwiklogtmp);
System.out.println("====> Created piwiklogtmp_json");
System.out.println("====> Dropped piwiklogtmp");
System.out.println("====> Creating piwiklogtmp");
String create_piwiklogtmp = "CREATE TABLE " +