forked from D-Net/dnet-hadoop
More progress on processlaReferenciaLog
This commit is contained in:
parent
a2d64b4644
commit
3c11acde0c
|
@ -127,14 +127,14 @@ public class LaReferenciaStats {
|
|||
ConnectDB.getConnection().setAutoCommit(false);
|
||||
|
||||
System.out.println("====> Dropping lareferencialogtmp_json table");
|
||||
String drop_piwiklogtmp_json = "DROP TABLE IF EXISTS " +
|
||||
String drop_lareferencialogtmp_json = "DROP TABLE IF EXISTS " +
|
||||
ConnectDB.getUsageStatsDBSchema() +
|
||||
".lareferencialogtmp_json";
|
||||
stmt.executeUpdate(drop_piwiklogtmp_json);
|
||||
stmt.executeUpdate(drop_lareferencialogtmp_json);
|
||||
System.out.println("====> Dropped lareferencialogtmp_json table");
|
||||
|
||||
System.out.println("====> Creating lareferencialogtmp_json");
|
||||
String create_piwiklogtmp_json = "CREATE EXTERNAL TABLE IF NOT EXISTS " +
|
||||
String create_lareferencialogtmp_json = "CREATE EXTERNAL TABLE IF NOT EXISTS " +
|
||||
ConnectDB.getUsageStatsDBSchema() +
|
||||
".lareferencialogtmp_json(\n" +
|
||||
" `idSite` STRING,\n" +
|
||||
|
@ -162,8 +162,40 @@ public class LaReferenciaStats {
|
|||
"ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'\n" +
|
||||
"LOCATION '" + UsageStatsExporter.lareferenciaLogPath + "'\n" +
|
||||
"TBLPROPERTIES (\"transactional\"=\"false\")";
|
||||
stmt.executeUpdate(create_piwiklogtmp_json);
|
||||
stmt.executeUpdate(create_lareferencialogtmp_json);
|
||||
System.out.println("====> Created lareferencialogtmp_json");
|
||||
|
||||
System.out.println("====> Dropping lareferencialogtmp table");
|
||||
String drop_lareferencialogtmp = "DROP TABLE IF EXISTS " +
|
||||
ConnectDB.getUsageStatsDBSchema() +
|
||||
".lareferencialogtmp";
|
||||
stmt.executeUpdate(drop_lareferencialogtmp);
|
||||
System.out.println("====> Dropped lareferencialogtmp table");
|
||||
|
||||
System.out.println("====> Creating lareferencialogtmp");
|
||||
String create_lareferencialogtmp = "CREATE TABLE " +
|
||||
ConnectDB.getUsageStatsDBSchema() + ".lareferencialogtmp(matomoid INT, " +
|
||||
"source STRING, id_visit STRING, country STRING, action STRING, url STRING, entity_id STRING, " +
|
||||
"source_item_type STRING, timestamp STRING, referrer_name STRING, agent STRING) " +
|
||||
"clustered by (source, id_visit, action, timestamp, entity_id) into 100 buckets " +
|
||||
"stored as orc tblproperties('transactional'='true')";
|
||||
stmt.executeUpdate(create_lareferencialogtmp);
|
||||
System.out.println("====> Created lareferencialogtmp");
|
||||
|
||||
System.out.println("====> Inserting into lareferencialogtmp");
|
||||
String insert_lareferencialogtmp = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".lareferencialogtmp " +
|
||||
"SELECT DISTINCT cast(idSite as INT) as matomoid, CONCAT('opendoar____::', " +
|
||||
"actiondetail.customVariables.`2`.customVariablePageValue2) as source, idVisit as id_Visit, country, " +
|
||||
"actiondetail.type as action, actiondetail.url as url, " +
|
||||
"actiondetail.customVariables.`1`.`customVariablePageValue1` as entity_id, " +
|
||||
"'repItem' as source_item_type, from_unixtime(cast(actiondetail.timestamp as BIGINT)) as timestamp, " +
|
||||
"referrerName as referrer_name, browser as agent " +
|
||||
"FROM " + ConnectDB.getUsageStatsDBSchema() + ".lareferencialogtmp_json " +
|
||||
"LATERAL VIEW explode(actiondetails) actiondetailsTable AS actiondetail";
|
||||
stmt.executeUpdate(insert_lareferencialogtmp);
|
||||
System.out.println("====> Inserted into lareferencialogtmp");
|
||||
|
||||
stmt.close();
|
||||
}
|
||||
|
||||
public void processlaReferenciaLogOld() throws Exception {
|
||||
|
|
|
@ -173,7 +173,7 @@ public class PiwikStatsDB {
|
|||
log.info("repository process done");
|
||||
|
||||
System.out.println("====> Removing double clicks");
|
||||
// removeDoubleClicks();
|
||||
removeDoubleClicks();
|
||||
System.out.println("====> Removing double clicks done");
|
||||
log.info("removing double clicks done");
|
||||
|
||||
|
@ -183,7 +183,7 @@ public class PiwikStatsDB {
|
|||
log.info("cleaning oai done");
|
||||
|
||||
System.out.println("====> ViewsStats processing starts");
|
||||
// viewsStats();
|
||||
viewsStats();
|
||||
System.out.println("====> ViewsStats processing ends");
|
||||
|
||||
System.out.println("====> DownloadsStats processing starts");
|
||||
|
@ -263,12 +263,12 @@ public class PiwikStatsDB {
|
|||
stmt.executeUpdate(create_piwiklogtmp_json);
|
||||
System.out.println("====> Created piwiklogtmp_json");
|
||||
|
||||
System.out.println("====> Droping piwiklogtmp table");
|
||||
System.out.println("====> Dropping piwiklogtmp table");
|
||||
String drop_piwiklogtmp = "DROP TABLE IF EXISTS " +
|
||||
ConnectDB.getUsageStatsDBSchema() +
|
||||
".piwiklogtmp";
|
||||
stmt.executeUpdate(drop_piwiklogtmp);
|
||||
System.out.println("====> Created piwiklogtmp_json");
|
||||
System.out.println("====> Dropped piwiklogtmp");
|
||||
|
||||
System.out.println("====> Creating piwiklogtmp");
|
||||
String create_piwiklogtmp = "CREATE TABLE " +
|
||||
|
|
Loading…
Reference in New Issue