processLogs.updateProdTables. I need feedback for processLogs.portalStats to see wy they never end

This commit is contained in:
Spyros Zoupanos 2020-09-13 12:23:03 +03:00
parent 9caac3e3e3
commit f8e91cdc5c
3 changed files with 44 additions and 41 deletions

View File

@ -43,7 +43,7 @@ public abstract class ConnectDB {
// Class.forName(properties.getProperty("Stats_db_Driver")); // Class.forName(properties.getProperty("Stats_db_Driver"));
dbURL = "jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000/;UseNativeQuery=1"; dbURL = "jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000/;UseNativeQuery=1";
usageStatsDBSchema = "usagestats_20200907"; usageStatsDBSchema = "usagestats_20200913";
statsDBSchema = "openaire_prod_stats_shadow_20200821"; statsDBSchema = "openaire_prod_stats_shadow_20200821";
Class.forName("org.apache.hive.jdbc.HiveDriver"); Class.forName("org.apache.hive.jdbc.HiveDriver");

View File

@ -43,7 +43,7 @@ public class PiwikStatsDB {
this.createTables(); this.createTables();
// The piwiklog table is not needed since it is built // The piwiklog table is not needed since it is built
// on top of JSON files // on top of JSON files
// this.createTmpTables(); this.createTmpTables();
} }
public void foo() { public void foo() {
@ -168,7 +168,7 @@ public class PiwikStatsDB {
this.robotsList = counterRobots.getRobotsPatterns(); this.robotsList = counterRobots.getRobotsPatterns();
System.out.println("====> Processing repository logs"); System.out.println("====> Processing repository logs");
processRepositoryLog(); // processRepositoryLog();
System.out.println("====> Repository logs process done"); System.out.println("====> Repository logs process done");
log.info("repository process done"); log.info("repository process done");
@ -196,13 +196,14 @@ public class PiwikStatsDB {
log.info("portal process done"); log.info("portal process done");
System.out.println("====> Processing portal usagestats"); System.out.println("====> Processing portal usagestats");
portalStats(); // To see why this never ends
// portalStats();
log.info("portal usagestats done"); log.info("portal usagestats done");
System.out.println("====> Portal usagestats process done"); System.out.println("====> Portal usagestats process done");
System.exit(0); System.out.println("====> Updating Production Tables");
// updateProdTables();
updateProdTables(); System.out.println("====> Updated Production Tables");
log.info("updateProdTables done"); log.info("updateProdTables done");
} catch (Exception e) { } catch (Exception e) {
@ -460,7 +461,7 @@ public class PiwikStatsDB {
stmt.executeUpdate(drop_views_stats); stmt.executeUpdate(drop_views_stats);
System.out.println("====> Dropped downloads_stats_tmp table"); System.out.println("====> Dropped downloads_stats_tmp table");
System.out.println("====> Creating downloads_stats_tmp view"); System.out.println("====> Creating downloads_stats_tmp table");
sql = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats_tmp AS " + sql = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats_tmp AS " +
"SELECT 'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date, " + "SELECT 'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date, " +
"max(downloads) AS count, max(openaire_referrer) AS openaire " + "max(downloads) AS count, max(openaire_referrer) AS openaire " +
@ -469,7 +470,7 @@ public class PiwikStatsDB {
"WHERE p.source=d.piwik_id and p.id=ro.oid " + "WHERE p.source=d.piwik_id and p.id=ro.oid " +
"GROUP BY d.id, ro.id, month " + "GROUP BY d.id, ro.id, month " +
"ORDER BY d.id, ro.id, month"; "ORDER BY d.id, ro.id, month";
System.out.println("====> Created downloads_stats_tmp view"); System.out.println("====> Created downloads_stats_tmp table");
stmt.executeUpdate(sql); stmt.executeUpdate(sql);
System.out.println("====> Dropping downloads_stats table"); System.out.println("====> Dropping downloads_stats table");
@ -486,15 +487,13 @@ public class PiwikStatsDB {
stmt.executeUpdate(create_pageviews_stats); stmt.executeUpdate(create_pageviews_stats);
System.out.println("====> Created downloads_stats table"); System.out.println("====> Created downloads_stats table");
System.out.println("====> Dropping pageviews_stats table"); System.out.println("====> Dropping result_downloads_monthly_tmp view");
sql = "DROP VIEW IF EXISTS result_downloads_monthly_tmp"; sql = "DROP VIEW IF EXISTS result_downloads_monthly_tmp";
System.out.println("====> Dropped pageviews_stats table"); System.out.println("====> Dropped result_downloads_monthly_tmp view");
stmt.executeUpdate(sql); stmt.executeUpdate(sql);
stmt.close(); stmt.close();
ConnectDB.getConnection().close(); ConnectDB.getConnection().close();
System.exit(0);
} }
public void finalizeStats() throws Exception { public void finalizeStats() throws Exception {
@ -811,7 +810,8 @@ public class PiwikStatsDB {
System.out.println("====> PortalStats - Step 1"); System.out.println("====> PortalStats - Step 1");
String sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " + String sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SELECT DISTINCT source, id_visit, country, action, url, roid.oid, 'oaItem', timestamp, referrer_name, agent " + "SELECT DISTINCT source, id_visit, country, action, url, roid.oid, 'oaItem', timestamp, referrer_name, agent "
+
"FROM " + ConnectDB.getUsageStatsDBSchema() + ".process_portal_log_tmp, " + "FROM " + ConnectDB.getUsageStatsDBSchema() + ".process_portal_log_tmp, " +
ConnectDB.getStatsDBSchema() + ".result_oids roid " + ConnectDB.getStatsDBSchema() + ".result_oids roid " +
"WHERE entity_id IS NOT null AND entity_id=roid.oid AND roid.oid IS NOT null"; "WHERE entity_id IS NOT null AND entity_id=roid.oid AND roid.oid IS NOT null";
@ -821,7 +821,8 @@ public class PiwikStatsDB {
System.out.println("====> PortalStats - Step 2"); System.out.println("====> PortalStats - Step 2");
stmt = con.createStatement(); stmt = con.createStatement();
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " + sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SELECT DISTINCT source, id_visit, country, action, url, roid.oid, 'datasource', timestamp, referrer_name, agent " + "SELECT DISTINCT source, id_visit, country, action, url, roid.oid, 'datasource', timestamp, referrer_name, agent "
+
"FROM " + ConnectDB.getUsageStatsDBSchema() + ".process_portal_log_tmp, " + "FROM " + ConnectDB.getUsageStatsDBSchema() + ".process_portal_log_tmp, " +
ConnectDB.getStatsDBSchema() + ".datasource_oids roid " + ConnectDB.getStatsDBSchema() + ".datasource_oids roid " +
"WHERE entity_id IS NOT null AND entity_id=roid.oid AND roid.oid IS NOT null"; "WHERE entity_id IS NOT null AND entity_id=roid.oid AND roid.oid IS NOT null";
@ -831,7 +832,8 @@ public class PiwikStatsDB {
System.out.println("====> PortalStats - Step 3"); System.out.println("====> PortalStats - Step 3");
stmt = con.createStatement(); stmt = con.createStatement();
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " + sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SELECT DISTINCT source, id_visit, country, action, url, roid.oid, 'organization', timestamp, referrer_name, agent " + "SELECT DISTINCT source, id_visit, country, action, url, roid.oid, 'organization', timestamp, referrer_name, agent "
+
"FROM " + ConnectDB.getUsageStatsDBSchema() + ".process_portal_log_tmp, " + "FROM " + ConnectDB.getUsageStatsDBSchema() + ".process_portal_log_tmp, " +
ConnectDB.getStatsDBSchema() + ".datasource_oids roid " + ConnectDB.getStatsDBSchema() + ".datasource_oids roid " +
"WHERE entity_id IS NOT null AND entity_id=roid.oid AND roid.oid IS NOT null"; "WHERE entity_id IS NOT null AND entity_id=roid.oid AND roid.oid IS NOT null";
@ -841,7 +843,8 @@ public class PiwikStatsDB {
System.out.println("====> PortalStats - Step 4"); System.out.println("====> PortalStats - Step 4");
stmt = con.createStatement(); stmt = con.createStatement();
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " + sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SELECT DISTINCT source, id_visit, country, action, url, roid.oid, 'project', timestamp, referrer_name, agent " + "SELECT DISTINCT source, id_visit, country, action, url, roid.oid, 'project', timestamp, referrer_name, agent "
+
"FROM " + ConnectDB.getUsageStatsDBSchema() + ".process_portal_log_tmp, " + "FROM " + ConnectDB.getUsageStatsDBSchema() + ".process_portal_log_tmp, " +
ConnectDB.getStatsDBSchema() + ".project_oids roid " + ConnectDB.getStatsDBSchema() + ".project_oids roid " +
"WHERE entity_id IS NOT null AND entity_id=roid.oid AND roid.oid IS NOT null"; "WHERE entity_id IS NOT null AND entity_id=roid.oid AND roid.oid IS NOT null";
@ -1133,45 +1136,43 @@ public class PiwikStatsDB {
Statement stmt = ConnectDB.getConnection().createStatement(); Statement stmt = ConnectDB.getConnection().createStatement();
ConnectDB.getConnection().setAutoCommit(false); ConnectDB.getConnection().setAutoCommit(false);
// String sql = "insert into piwiklog select * from piwiklogtmp;"; System.out.println("====> Inserting data to piwiklog");
String sql = String sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".piwiklog " +
"INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".piwiklog " +
"SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp"; "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp";
stmt.executeUpdate(sql); stmt.executeUpdate(sql);
// sql = "insert into views_stats select * from views_stats_tmp;"; System.out.println("====> Inserting data to views_stats");
sql = sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".views_stats " +
"INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".views_stats " +
"SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".views_stats_tmp"; "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".views_stats_tmp";
stmt.executeUpdate(sql); stmt.executeUpdate(sql);
// sql = "insert into downloads_stats select * from downloads_stats_tmp;"; System.out.println("====> Inserting data to downloads_stats");
sql = sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats " +
"INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats " +
"SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats_tmp"; "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats_tmp";
stmt.executeUpdate(sql); stmt.executeUpdate(sql);
// sql = "insert into pageviews_stats select * from pageviews_stats_tmp;"; System.out.println("====> Inserting data to pageviews_stats");
sql = sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".pageviews_stats " +
"INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".pageviews_stats " +
"SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".pageviews_stats_tmp"; "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".pageviews_stats_tmp";
stmt.executeUpdate(sql); stmt.executeUpdate(sql);
// sql = "DROP TABLE IF EXISTS views_stats_tmp;"; System.out.println("====> Dropping table views_stats_tmp");
sql = ""; sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".views_stats_tmp";
stmt.executeUpdate(sql); stmt.executeUpdate(sql);
sql = "DROP TABLE IF EXISTS downloads_stats_tmp;"; System.out.println("====> Dropping table downloads_stats_tmp");
sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats_tmp";
stmt.executeUpdate(sql); stmt.executeUpdate(sql);
sql = "DROP TABLE IF EXISTS pageviews_stats_tmp;"; System.out.println("====> Dropping table pageviews_stats_tmp");
sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".pageviews_stats_tmp";
stmt.executeUpdate(sql); stmt.executeUpdate(sql);
sql = "DROP TABLE IF EXISTS process_portal_log_tmp;"; System.out.println("====> Dropping table process_portal_log_tmp");
sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".process_portal_log_tmp";
stmt.executeUpdate(sql); stmt.executeUpdate(sql);
stmt.close(); stmt.close();
ConnectDB.getConnection().commit();
ConnectDB.getConnection().close(); ConnectDB.getConnection().close();
log.info("updateProdTables done"); log.info("updateProdTables done");

View File

@ -12,13 +12,13 @@ public class UsageStatsExporter {
static String matomoAuthToken = "703bd17d845acdaf795e01bb1e0895b9"; static String matomoAuthToken = "703bd17d845acdaf795e01bb1e0895b9";
static String matomoBaseURL = "analytics.openaire.eu"; static String matomoBaseURL = "analytics.openaire.eu";
static String repoLogPath = "/user/spyros/logs/usage_stats_logs4/Repologs"; static String repoLogPath = "/user/spyros/logs/usage_stats_logs5/Repologs";
static String portalLogPath = "/user/spyros/logs/usage_stats_logs4/Portallogs/"; static String portalLogPath = "/user/spyros/logs/usage_stats_logs5/Portallogs/";
static String portalMatomoID = "109"; static String portalMatomoID = "109";
static String irusUKBaseURL = "https://irus.jisc.ac.uk/api/sushilite/v1_7/"; static String irusUKBaseURL = "https://irus.jisc.ac.uk/api/sushilite/v1_7/";
static String irusUKReportPath = "/user/spyros/logs/usage_stats_logs4/irusUKReports"; static String irusUKReportPath = "/user/spyros/logs/usage_stats_logs5/irusUKReports";
static String sarcsReportPath = "/user/spyros/logs/usage_stats_logs4/sarcReports"; static String sarcsReportPath = "/user/spyros/logs/usage_stats_logs5/sarcReports";
public UsageStatsExporter(Properties properties) { public UsageStatsExporter(Properties properties) {
this.properties = properties; this.properties = properties;
@ -51,8 +51,10 @@ public class UsageStatsExporter {
piwikstatsdb.processLogs(); piwikstatsdb.processLogs();
log.info("process logs done"); log.info("process logs done");
// IrusStats irusstats = new IrusStats(irusUKBaseURL); System.exit(0);
// irusstats.processIrusRRReport(irusUKReportPath);
IrusStats irusstats = new IrusStats(irusUKBaseURL);
irusstats.processIrusRRReport(irusUKReportPath);
// irusstats.irusStats(); // irusstats.irusStats();
// log.info("irus done"); // log.info("irus done");