forked from D-Net/dnet-hadoop
Changing portalStats queries to faster ones
This commit is contained in:
parent
69640f5fc4
commit
bc5cf28375
|
@ -168,12 +168,12 @@ public class PiwikStatsDB {
|
|||
this.robotsList = counterRobots.getRobotsPatterns();
|
||||
|
||||
System.out.println("====> Processing repository logs");
|
||||
processRepositoryLog();
|
||||
// processRepositoryLog();
|
||||
System.out.println("====> Repository logs process done");
|
||||
log.info("repository process done");
|
||||
|
||||
System.out.println("====> Removing double clicks");
|
||||
removeDoubleClicks();
|
||||
// removeDoubleClicks();
|
||||
System.out.println("====> Removing double clicks done");
|
||||
log.info("removing double clicks done");
|
||||
|
||||
|
@ -183,7 +183,7 @@ public class PiwikStatsDB {
|
|||
log.info("cleaning oai done");
|
||||
|
||||
System.out.println("====> ViewsStats processing starts");
|
||||
viewsStats();
|
||||
// viewsStats();
|
||||
System.out.println("====> ViewsStats processing ends");
|
||||
|
||||
System.out.println("====> DownloadsStats processing starts");
|
||||
|
@ -197,12 +197,12 @@ public class PiwikStatsDB {
|
|||
|
||||
System.out.println("====> Processing portal usagestats");
|
||||
// To see why this never ends
|
||||
// portalStats();
|
||||
portalStats();
|
||||
log.info("portal usagestats done");
|
||||
System.out.println("====> Portal usagestats process done");
|
||||
|
||||
System.out.println("====> Updating Production Tables");
|
||||
updateProdTables();
|
||||
// updateProdTables();
|
||||
System.out.println("====> Updated Production Tables");
|
||||
log.info("updateProdTables done");
|
||||
|
||||
|
@ -814,44 +814,48 @@ public class PiwikStatsDB {
|
|||
|
||||
System.out.println("====> PortalStats - Step 1");
|
||||
String sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SELECT DISTINCT source, id_visit, country, action, url, roid.oid, 'oaItem', timestamp, referrer_name, agent "
|
||||
"SELECT DISTINCT source, id_visit, country, action, url, entity_id, 'oaItem', `timestamp`, referrer_name, agent "
|
||||
+
|
||||
"FROM " + ConnectDB.getUsageStatsDBSchema() + ".process_portal_log_tmp, " +
|
||||
ConnectDB.getStatsDBSchema() + ".result_oids roid " +
|
||||
"WHERE entity_id IS NOT null AND entity_id=roid.oid AND roid.oid IS NOT null";
|
||||
"FROM " + ConnectDB.getUsageStatsDBSchema() + ".process_portal_log_tmp " +
|
||||
"WHERE process_portal_log_tmp.entity_id and process_portal_log_tmp.entity_id " +
|
||||
"IN (SELECT roid.oid FROM " + ConnectDB.getStatsDBSchema()
|
||||
+ ".result_oids roid WHERE roid.oid IS NOT NULL)";
|
||||
stmt.executeUpdate(sql);
|
||||
stmt.close();
|
||||
|
||||
System.out.println("====> PortalStats - Step 2");
|
||||
stmt = con.createStatement();
|
||||
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SELECT DISTINCT source, id_visit, country, action, url, roid.oid, 'datasource', timestamp, referrer_name, agent "
|
||||
"SELECT DISTINCT source, id_visit, country, action, url, entity_id, 'datasource', `timestamp`, referrer_name, agent "
|
||||
+
|
||||
"FROM " + ConnectDB.getUsageStatsDBSchema() + ".process_portal_log_tmp, " +
|
||||
ConnectDB.getStatsDBSchema() + ".datasource_oids roid " +
|
||||
"WHERE entity_id IS NOT null AND entity_id=roid.oid AND roid.oid IS NOT null";
|
||||
"FROM " + ConnectDB.getUsageStatsDBSchema() + ".process_portal_log_tmp " +
|
||||
"WHERE process_portal_log_tmp.entity_id and process_portal_log_tmp.entity_id " +
|
||||
"IN (SELECT roid.oid FROM " + ConnectDB.getStatsDBSchema()
|
||||
+ ".datasource_oids roid WHERE roid.oid IS NOT NULL)";
|
||||
stmt.executeUpdate(sql);
|
||||
stmt.close();
|
||||
|
||||
System.out.println("====> PortalStats - Step 3");
|
||||
stmt = con.createStatement();
|
||||
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SELECT DISTINCT source, id_visit, country, action, url, roid.oid, 'organization', timestamp, referrer_name, agent "
|
||||
"SELECT DISTINCT source, id_visit, country, action, url, entity_id, 'organization', `timestamp`, referrer_name, agent "
|
||||
+
|
||||
"FROM " + ConnectDB.getUsageStatsDBSchema() + ".process_portal_log_tmp, " +
|
||||
ConnectDB.getStatsDBSchema() + ".datasource_oids roid " +
|
||||
"WHERE entity_id IS NOT null AND entity_id=roid.oid AND roid.oid IS NOT null";
|
||||
stmt.executeUpdate(sql);
|
||||
"FROM " + ConnectDB.getUsageStatsDBSchema() + ".process_portal_log_tmp " +
|
||||
"WHERE process_portal_log_tmp.entity_id and process_portal_log_tmp.entity_id " +
|
||||
"IN (SELECT roid.oid FROM " + ConnectDB.getStatsDBSchema()
|
||||
+ ".organization_oids roid WHERE roid.oid IS NOT NULL)";
|
||||
// stmt.executeUpdate(sql);
|
||||
stmt.close();
|
||||
|
||||
System.out.println("====> PortalStats - Step 4");
|
||||
stmt = con.createStatement();
|
||||
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SELECT DISTINCT source, id_visit, country, action, url, roid.oid, 'project', timestamp, referrer_name, agent "
|
||||
"SELECT DISTINCT source, id_visit, country, action, url, entity_id, 'project', `timestamp`, referrer_name, agent "
|
||||
+
|
||||
"FROM " + ConnectDB.getUsageStatsDBSchema() + ".process_portal_log_tmp, " +
|
||||
ConnectDB.getStatsDBSchema() + ".project_oids roid " +
|
||||
"WHERE entity_id IS NOT null AND entity_id=roid.oid AND roid.oid IS NOT null";
|
||||
"FROM " + ConnectDB.getUsageStatsDBSchema() + ".process_portal_log_tmp " +
|
||||
"WHERE process_portal_log_tmp.entity_id and process_portal_log_tmp.entity_id " +
|
||||
"IN (SELECT roid.oid FROM " + ConnectDB.getStatsDBSchema()
|
||||
+ ".project_oids roid WHERE roid.oid IS NOT NULL)";
|
||||
stmt.executeUpdate(sql);
|
||||
stmt.close();
|
||||
|
||||
|
|
|
@ -54,7 +54,7 @@ public class UsageStatsExporter {
|
|||
String cRobotsUrl = "https://raw.githubusercontent.com/atmire/COUNTER-Robots/master/COUNTER_Robots_list.json";
|
||||
piwikstatsdb.setCounterRobotsURL(cRobotsUrl);
|
||||
System.out.println("====> Processing logs");
|
||||
// piwikstatsdb.processLogs();
|
||||
piwikstatsdb.processLogs();
|
||||
log.info("process logs done");
|
||||
|
||||
System.out.println("====> Creating LaReferencia tables");
|
||||
|
@ -64,7 +64,7 @@ public class UsageStatsExporter {
|
|||
System.out.println("====> Downloaded LaReferencia logs");
|
||||
LaReferenciaStats lastats = new LaReferenciaStats(lareferenciaLogPath);
|
||||
System.out.println("====> Processing LaReferencia logs");
|
||||
lastats.processLogs();
|
||||
// lastats.processLogs();
|
||||
// log.info("LaReferencia logs done");
|
||||
|
||||
// IrusStats irusstats = new IrusStats(irusUKBaseURL);
|
||||
|
|
Loading…
Reference in New Issue