Changing portalStats queries to faster ones

This commit is contained in:
Spyros Zoupanos 2020-09-23 21:48:51 +03:00
parent 69640f5fc4
commit bc5cf28375
2 changed files with 28 additions and 24 deletions

View File

@ -168,12 +168,12 @@ public class PiwikStatsDB {
this.robotsList = counterRobots.getRobotsPatterns();
System.out.println("====> Processing repository logs");
processRepositoryLog();
// processRepositoryLog();
System.out.println("====> Repository logs process done");
log.info("repository process done");
System.out.println("====> Removing double clicks");
removeDoubleClicks();
// removeDoubleClicks();
System.out.println("====> Removing double clicks done");
log.info("removing double clicks done");
@ -183,7 +183,7 @@ public class PiwikStatsDB {
log.info("cleaning oai done");
System.out.println("====> ViewsStats processing starts");
viewsStats();
// viewsStats();
System.out.println("====> ViewsStats processing ends");
System.out.println("====> DownloadsStats processing starts");
@ -197,12 +197,12 @@ public class PiwikStatsDB {
System.out.println("====> Processing portal usagestats");
// To see why this never ends
// portalStats();
portalStats();
log.info("portal usagestats done");
System.out.println("====> Portal usagestats process done");
System.out.println("====> Updating Production Tables");
updateProdTables();
// updateProdTables();
System.out.println("====> Updated Production Tables");
log.info("updateProdTables done");
@ -814,44 +814,48 @@ public class PiwikStatsDB {
System.out.println("====> PortalStats - Step 1");
String sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SELECT DISTINCT source, id_visit, country, action, url, roid.oid, 'oaItem', timestamp, referrer_name, agent "
"SELECT DISTINCT source, id_visit, country, action, url, entity_id, 'oaItem', `timestamp`, referrer_name, agent "
+
"FROM " + ConnectDB.getUsageStatsDBSchema() + ".process_portal_log_tmp, " +
ConnectDB.getStatsDBSchema() + ".result_oids roid " +
"WHERE entity_id IS NOT null AND entity_id=roid.oid AND roid.oid IS NOT null";
"FROM " + ConnectDB.getUsageStatsDBSchema() + ".process_portal_log_tmp " +
"WHERE process_portal_log_tmp.entity_id and process_portal_log_tmp.entity_id " +
"IN (SELECT roid.oid FROM " + ConnectDB.getStatsDBSchema()
+ ".result_oids roid WHERE roid.oid IS NOT NULL)";
stmt.executeUpdate(sql);
stmt.close();
System.out.println("====> PortalStats - Step 2");
stmt = con.createStatement();
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SELECT DISTINCT source, id_visit, country, action, url, roid.oid, 'datasource', timestamp, referrer_name, agent "
"SELECT DISTINCT source, id_visit, country, action, url, entity_id, 'datasource', `timestamp`, referrer_name, agent "
+
"FROM " + ConnectDB.getUsageStatsDBSchema() + ".process_portal_log_tmp, " +
ConnectDB.getStatsDBSchema() + ".datasource_oids roid " +
"WHERE entity_id IS NOT null AND entity_id=roid.oid AND roid.oid IS NOT null";
"FROM " + ConnectDB.getUsageStatsDBSchema() + ".process_portal_log_tmp " +
"WHERE process_portal_log_tmp.entity_id and process_portal_log_tmp.entity_id " +
"IN (SELECT roid.oid FROM " + ConnectDB.getStatsDBSchema()
+ ".datasource_oids roid WHERE roid.oid IS NOT NULL)";
stmt.executeUpdate(sql);
stmt.close();
System.out.println("====> PortalStats - Step 3");
stmt = con.createStatement();
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SELECT DISTINCT source, id_visit, country, action, url, roid.oid, 'organization', timestamp, referrer_name, agent "
"SELECT DISTINCT source, id_visit, country, action, url, entity_id, 'organization', `timestamp`, referrer_name, agent "
+
"FROM " + ConnectDB.getUsageStatsDBSchema() + ".process_portal_log_tmp, " +
ConnectDB.getStatsDBSchema() + ".datasource_oids roid " +
"WHERE entity_id IS NOT null AND entity_id=roid.oid AND roid.oid IS NOT null";
stmt.executeUpdate(sql);
"FROM " + ConnectDB.getUsageStatsDBSchema() + ".process_portal_log_tmp " +
"WHERE process_portal_log_tmp.entity_id and process_portal_log_tmp.entity_id " +
"IN (SELECT roid.oid FROM " + ConnectDB.getStatsDBSchema()
+ ".organization_oids roid WHERE roid.oid IS NOT NULL)";
// stmt.executeUpdate(sql);
stmt.close();
System.out.println("====> PortalStats - Step 4");
stmt = con.createStatement();
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SELECT DISTINCT source, id_visit, country, action, url, roid.oid, 'project', timestamp, referrer_name, agent "
"SELECT DISTINCT source, id_visit, country, action, url, entity_id, 'project', `timestamp`, referrer_name, agent "
+
"FROM " + ConnectDB.getUsageStatsDBSchema() + ".process_portal_log_tmp, " +
ConnectDB.getStatsDBSchema() + ".project_oids roid " +
"WHERE entity_id IS NOT null AND entity_id=roid.oid AND roid.oid IS NOT null";
"FROM " + ConnectDB.getUsageStatsDBSchema() + ".process_portal_log_tmp " +
"WHERE process_portal_log_tmp.entity_id and process_portal_log_tmp.entity_id " +
"IN (SELECT roid.oid FROM " + ConnectDB.getStatsDBSchema()
+ ".project_oids roid WHERE roid.oid IS NOT NULL)";
stmt.executeUpdate(sql);
stmt.close();

View File

@ -54,7 +54,7 @@ public class UsageStatsExporter {
String cRobotsUrl = "https://raw.githubusercontent.com/atmire/COUNTER-Robots/master/COUNTER_Robots_list.json";
piwikstatsdb.setCounterRobotsURL(cRobotsUrl);
System.out.println("====> Processing logs");
// piwikstatsdb.processLogs();
piwikstatsdb.processLogs();
log.info("process logs done");
System.out.println("====> Creating LaReferencia tables");
@ -64,7 +64,7 @@ public class UsageStatsExporter {
System.out.println("====> Downloaded LaReferencia logs");
LaReferenciaStats lastats = new LaReferenciaStats(lareferenciaLogPath);
System.out.println("====> Processing LaReferencia logs");
lastats.processLogs();
// lastats.processLogs();
// log.info("LaReferencia logs done");
// IrusStats irusstats = new IrusStats(irusUKBaseURL);