lareferencia removeDoubleClicks done

This commit is contained in:
Spyros Zoupanos 2020-09-21 22:27:15 +03:00
parent 0369f36776
commit 65acece7c4
2 changed files with 32 additions and 91 deletions

View File

@ -104,12 +104,15 @@ public class LaReferenciaStats {
public void processLogs() throws Exception { public void processLogs() throws Exception {
try { try {
System.out.println("====> Processing LaReferencia repository logs"); System.out.println("====> Processing LaReferencia repository logs");
processlaReferenciaLog(); // processlaReferenciaLog();
System.out.println("====> LaReferencia repository logs process done"); System.out.println("====> LaReferencia repository logs process done");
log.info("LaReferencia repository process done"); log.info("LaReferencia repository process done");
System.out.println("====> LaReferencia removing double clicks");
// removeDoubleClicks(); // removeDoubleClicks();
// log.info("LaReferencia removing double clicks done"); System.out.println("====> LaReferencia removed double clicks");
// viewsStats(); log.info("LaReferencia removing double clicks done");
viewsStats();
// log.info("LaReferencia views done"); // log.info("LaReferencia views done");
// downloadsStats(); // downloadsStats();
// log.info("LaReferencia downloads done"); // log.info("LaReferencia downloads done");
@ -202,105 +205,43 @@ public class LaReferenciaStats {
stmt.close(); stmt.close();
} }
public void processlaReferenciaLogOld() throws Exception {
Statement stmt = ConnectDB.getConnection().createStatement();
ConnectDB.getConnection().setAutoCommit(false);
ArrayList<String> jsonFiles = listHdfsDir(this.logRepoPath);
// File dir = new File(this.logRepoPath);
// File[] jsonFiles = dir.listFiles();
PreparedStatement prepStatem = ConnectDB
.getConnection()
.prepareStatement(
"INSERT INTO lareferencialogtmp (matomoid, source, id_visit, country, action, url, entity_id, source_item_type, timestamp, referrer_name, agent) VALUES (?,?,?,?,?,?,?,?,?,?,?)");
int batch_size = 0;
JSONParser parser = new JSONParser();
for (String jsonFile : jsonFiles) {
System.out.println(jsonFile);
JSONArray jsonArray = (JSONArray) parser.parse(readHDFSFile(jsonFile));
for (Object aJsonArray : jsonArray) {
JSONObject jsonObjectRow = (JSONObject) aJsonArray;
int idSite = Integer.parseInt(jsonObjectRow.get("idSite").toString());
String idVisit = jsonObjectRow.get("idVisit").toString();
String country = jsonObjectRow.get("country").toString();
String referrerName = jsonObjectRow.get("referrerName").toString();
String agent = jsonObjectRow.get("browser").toString();
String sourceItemType = "repItem";
JSONArray actionDetails = (JSONArray) jsonObjectRow.get(("actionDetails"));
for (Object actionDetail : actionDetails) {
JSONObject actionDetailsObj = (JSONObject) actionDetail;
if (actionDetailsObj.get("customVariables") != null) {
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
simpleDateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
Timestamp timestamp = new Timestamp(
Long.parseLong(actionDetailsObj.get("timestamp").toString()) * 1000);
String url = actionDetailsObj.get("url").toString();
String oaipmh = ((JSONObject) ((JSONObject) actionDetailsObj.get("customVariables")).get("1"))
.get("customVariablePageValue1")
.toString();
String opendoar = ((JSONObject) ((JSONObject) actionDetailsObj.get("customVariables")).get("2"))
.get("customVariablePageValue2")
.toString();
String action = actionDetailsObj.get("type").toString();
prepStatem.setInt(1, idSite);
prepStatem.setString(2, "opendoar____::" + opendoar);
prepStatem.setString(3, idVisit);
prepStatem.setString(4, country);
prepStatem.setString(5, action);
prepStatem.setString(6, url);
prepStatem.setString(7, oaipmh);
prepStatem.setString(8, sourceItemType);
prepStatem.setString(9, simpleDateFormat.format(timestamp));
prepStatem.setString(10, referrerName);
prepStatem.setString(11, agent);
// prepStatem.setString(11, );
prepStatem.addBatch();
batch_size++;
if (batch_size == 10000) {
prepStatem.executeBatch();
ConnectDB.getConnection().commit();
batch_size = 0;
}
}
}
}
}
try {
prepStatem.executeBatch();
ConnectDB.getConnection().commit();
stmt.close();
} catch (Exception e) {
if (e instanceof java.sql.SQLException) {
java.sql.SQLException ne = ((java.sql.SQLException) e).getNextException();
System.out.println(ne.getMessage());
}
}
}
public void removeDoubleClicks() throws Exception { public void removeDoubleClicks() throws Exception {
Statement stmt = ConnectDB.getConnection().createStatement(); Statement stmt = ConnectDB.getConnection().createStatement();
ConnectDB.getConnection().setAutoCommit(false); ConnectDB.getConnection().setAutoCommit(false);
System.out.println("====> Cleaning download double clicks");
// clean download double clicks // clean download double clicks
String sql = "DELETE FROM lareferencialogtmp p WHERE EXISTS (SELECT DISTINCT p1.source, p1.id_visit, p1.action, p1.entity_id, p1.timestamp FROM lareferencialogtmp p1, lareferencialogtmp p2 WHERE p1.source=p2.source AND p1.id_visit=p2.id_visit AND p1.entity_id=p2.entity_id AND p1.action=p2.action AND p1.action='download' AND p1.timestamp!=p2.timestamp AND p1.timestamp<p2.timestamp AND extract(EPOCH FROM p2.timestamp::timestamp-p1.timestamp::timestamp)<30 AND p.source=p1.source AND p.id_visit=p1.id_visit AND p.action=p1.action AND p.entity_id=p1.entity_id AND p.timestamp=p1.timestamp);"; String sql = "DELETE from " + ConnectDB.getUsageStatsDBSchema() + ".lareferencialogtmp WHERE EXISTS (" +
"SELECT DISTINCT p1.source, p1.id_visit, p1.action, p1.entity_id, p1.timestamp " +
"FROM " + ConnectDB.getUsageStatsDBSchema() + ".lareferencialogtmp p1, " +
ConnectDB.getUsageStatsDBSchema() + ".lareferencialogtmp p2 " +
"WHERE p1.source=p2.source AND p1.id_visit=p2.id_visit AND p1.entity_id=p2.entity_id " +
"AND p1.action=p2.action AND p1.action='download' AND p1.timestamp!=p2.timestamp " +
"AND p1.timestamp<p2.timestamp AND ((unix_timestamp(p2.timestamp)-unix_timestamp(p1.timestamp))/60)<30 " +
"AND lareferencialogtmp.source=p1.source AND lareferencialogtmp.id_visit=p1.id_visit " +
"AND lareferencialogtmp.action=p1.action AND lareferencialogtmp.entity_id=p1.entity_id " +
"AND lareferencialogtmp.timestamp=p1.timestamp)";
stmt.executeUpdate(sql); stmt.executeUpdate(sql);
stmt.close(); stmt.close();
ConnectDB.getConnection().commit(); System.out.println("====> Cleaned download double clicks");
stmt = ConnectDB.getConnection().createStatement(); stmt = ConnectDB.getConnection().createStatement();
System.out.println("====> Cleaning action double clicks");
// clean view double clicks // clean view double clicks
sql = "DELETE FROM lareferencialogtmp p WHERE EXISTS (SELECT DISTINCT p1.source, p1.id_visit, p1.action, p1.entity_id, p1.timestamp from lareferencialogtmp p1, lareferencialogtmp p2 WHERE p1.source=p2.source AND p1.id_visit=p2.id_visit AND p1.entity_id=p2.entity_id AND p1.action=p2.action AND p1.action='action' AND p1.timestamp!=p2.timestamp AND p1.timestamp<p2.timestamp AND extract(EPOCH FROM p2.timestamp::timestamp-p1.timestamp::timestamp)<10 AND p.source=p1.source AND p.id_visit=p1.id_visit AND p.action=p1.action AND p.entity_id=p1.entity_id AND p.timestamp=p1.timestamp);"; sql = "DELETE from " + ConnectDB.getUsageStatsDBSchema() + ".lareferencialogtmp WHERE EXISTS (" +
"SELECT DISTINCT p1.source, p1.id_visit, p1.action, p1.entity_id, p1.timestamp " +
"FROM " + ConnectDB.getUsageStatsDBSchema() + ".lareferencialogtmp p1, " +
ConnectDB.getUsageStatsDBSchema() + ".lareferencialogtmp p2 " +
"WHERE p1.source=p2.source AND p1.id_visit=p2.id_visit AND p1.entity_id=p2.entity_id " +
"AND p1.action=p2.action AND p1.action='action' AND p1.timestamp!=p2.timestamp " +
"AND p1.timestamp<p2.timestamp AND ((unix_timestamp(p2.timestamp)-unix_timestamp(p1.timestamp))/60)<10 " +
"AND lareferencialogtmp.source=p1.source AND lareferencialogtmp.id_visit=p1.id_visit " +
"AND lareferencialogtmp.action=p1.action AND lareferencialogtmp.entity_id=p1.entity_id " +
"AND lareferencialogtmp.timestamp=p1.timestamp)";
stmt.executeUpdate(sql); stmt.executeUpdate(sql);
stmt.close(); stmt.close();
ConnectDB.getConnection().commit(); System.out.println("====> Cleaned action double clicks");
// conn.close(); // conn.close();
} }

View File

@ -319,7 +319,7 @@ public class PiwikStatsDB {
System.out.println("====> Cleaned download double clicks"); System.out.println("====> Cleaned download double clicks");
// clean view double clicks // clean view double clicks
System.out.println("====> Cleaning view double clicks"); System.out.println("====> Cleaning action double clicks");
sql = "DELETE from " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " + sql = "DELETE from " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"WHERE EXISTS (\n" + "WHERE EXISTS (\n" +
"SELECT DISTINCT p1.source, p1.id_visit, p1.action, p1.entity_id, p1.timestamp \n" + "SELECT DISTINCT p1.source, p1.id_visit, p1.action, p1.entity_id, p1.timestamp \n" +
@ -332,7 +332,7 @@ public class PiwikStatsDB {
"AND piwiklogtmp.source=p1.source AND piwiklogtmp.id_visit=p1.id_visit \n" + "AND piwiklogtmp.source=p1.source AND piwiklogtmp.id_visit=p1.id_visit \n" +
"AND piwiklogtmp.action=p1.action AND piwiklogtmp.entity_id=p1.entity_id AND piwiklogtmp.timestamp=p1.timestamp)"; "AND piwiklogtmp.action=p1.action AND piwiklogtmp.entity_id=p1.entity_id AND piwiklogtmp.timestamp=p1.timestamp)";
stmt.executeUpdate(sql); stmt.executeUpdate(sql);
System.out.println("====> Cleaned view double clicks"); System.out.println("====> Cleaned action double clicks");
stmt.close(); stmt.close();
} }