From f8e91cdc5caaacb0f19d45f0aebef76e296777a6 Mon Sep 17 00:00:00 2001 From: Spyros Zoupanos Date: Sun, 13 Sep 2020 12:23:03 +0300 Subject: [PATCH] processLogs.updateProdTables. I need feedback for processLogs.portalStats to see wy they never end --- .../oa/graph/usagestats/export/ConnectDB.java | 2 +- .../graph/usagestats/export/PiwikStatsDB.java | 69 ++++++++++--------- .../usagestats/export/UsageStatsExporter.java | 14 ++-- 3 files changed, 44 insertions(+), 41 deletions(-) diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/ConnectDB.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/ConnectDB.java index 7f89c2942f..2645c6994f 100644 --- a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/ConnectDB.java +++ b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/ConnectDB.java @@ -43,7 +43,7 @@ public abstract class ConnectDB { // Class.forName(properties.getProperty("Stats_db_Driver")); dbURL = "jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000/;UseNativeQuery=1"; - usageStatsDBSchema = "usagestats_20200907"; + usageStatsDBSchema = "usagestats_20200913"; statsDBSchema = "openaire_prod_stats_shadow_20200821"; Class.forName("org.apache.hive.jdbc.HiveDriver"); diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/PiwikStatsDB.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/PiwikStatsDB.java index e618e1f2ec..c0e1a3bcfc 100644 --- a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/PiwikStatsDB.java +++ b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/PiwikStatsDB.java @@ -43,7 +43,7 @@ public class PiwikStatsDB { this.createTables(); // The piwiklog table is not needed since it is built // on top of JSON files -// this.createTmpTables(); + this.createTmpTables(); } public void foo() { @@ -168,7 +168,7 @@ public class PiwikStatsDB { this.robotsList = counterRobots.getRobotsPatterns(); System.out.println("====> Processing repository logs"); - processRepositoryLog(); +// processRepositoryLog(); System.out.println("====> Repository logs process done"); log.info("repository process done"); @@ -196,13 +196,14 @@ public class PiwikStatsDB { log.info("portal process done"); System.out.println("====> Processing portal usagestats"); - portalStats(); + // To see why this never ends +// portalStats(); log.info("portal usagestats done"); System.out.println("====> Portal usagestats process done"); - System.exit(0); - - updateProdTables(); + System.out.println("====> Updating Production Tables"); +// updateProdTables(); + System.out.println("====> Updated Production Tables"); log.info("updateProdTables done"); } catch (Exception e) { @@ -460,7 +461,7 @@ public class PiwikStatsDB { stmt.executeUpdate(drop_views_stats); System.out.println("====> Dropped downloads_stats_tmp table"); - System.out.println("====> Creating downloads_stats_tmp view"); + System.out.println("====> Creating downloads_stats_tmp table"); sql = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats_tmp AS " + "SELECT 'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date, " + "max(downloads) AS count, max(openaire_referrer) AS openaire " + @@ -469,7 +470,7 @@ public class PiwikStatsDB { "WHERE p.source=d.piwik_id and p.id=ro.oid " + "GROUP BY d.id, ro.id, month " + "ORDER BY d.id, ro.id, month"; - System.out.println("====> Created downloads_stats_tmp view"); + System.out.println("====> Created downloads_stats_tmp table"); stmt.executeUpdate(sql); System.out.println("====> Dropping downloads_stats table"); @@ -486,15 +487,13 @@ public class PiwikStatsDB { stmt.executeUpdate(create_pageviews_stats); System.out.println("====> Created downloads_stats table"); - System.out.println("====> Dropping pageviews_stats table"); + System.out.println("====> Dropping result_downloads_monthly_tmp view"); sql = "DROP VIEW IF EXISTS result_downloads_monthly_tmp"; - System.out.println("====> Dropped pageviews_stats table"); + System.out.println("====> Dropped result_downloads_monthly_tmp view"); stmt.executeUpdate(sql); stmt.close(); ConnectDB.getConnection().close(); - - System.exit(0); } public void finalizeStats() throws Exception { @@ -811,7 +810,8 @@ public class PiwikStatsDB { System.out.println("====> PortalStats - Step 1"); String sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " + - "SELECT DISTINCT source, id_visit, country, action, url, roid.oid, 'oaItem', timestamp, referrer_name, agent " + + "SELECT DISTINCT source, id_visit, country, action, url, roid.oid, 'oaItem', timestamp, referrer_name, agent " + + "FROM " + ConnectDB.getUsageStatsDBSchema() + ".process_portal_log_tmp, " + ConnectDB.getStatsDBSchema() + ".result_oids roid " + "WHERE entity_id IS NOT null AND entity_id=roid.oid AND roid.oid IS NOT null"; @@ -821,7 +821,8 @@ public class PiwikStatsDB { System.out.println("====> PortalStats - Step 2"); stmt = con.createStatement(); sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " + - "SELECT DISTINCT source, id_visit, country, action, url, roid.oid, 'datasource', timestamp, referrer_name, agent " + + "SELECT DISTINCT source, id_visit, country, action, url, roid.oid, 'datasource', timestamp, referrer_name, agent " + + "FROM " + ConnectDB.getUsageStatsDBSchema() + ".process_portal_log_tmp, " + ConnectDB.getStatsDBSchema() + ".datasource_oids roid " + "WHERE entity_id IS NOT null AND entity_id=roid.oid AND roid.oid IS NOT null"; @@ -831,7 +832,8 @@ public class PiwikStatsDB { System.out.println("====> PortalStats - Step 3"); stmt = con.createStatement(); sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " + - "SELECT DISTINCT source, id_visit, country, action, url, roid.oid, 'organization', timestamp, referrer_name, agent " + + "SELECT DISTINCT source, id_visit, country, action, url, roid.oid, 'organization', timestamp, referrer_name, agent " + + "FROM " + ConnectDB.getUsageStatsDBSchema() + ".process_portal_log_tmp, " + ConnectDB.getStatsDBSchema() + ".datasource_oids roid " + "WHERE entity_id IS NOT null AND entity_id=roid.oid AND roid.oid IS NOT null"; @@ -841,7 +843,8 @@ public class PiwikStatsDB { System.out.println("====> PortalStats - Step 4"); stmt = con.createStatement(); sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " + - "SELECT DISTINCT source, id_visit, country, action, url, roid.oid, 'project', timestamp, referrer_name, agent " + + "SELECT DISTINCT source, id_visit, country, action, url, roid.oid, 'project', timestamp, referrer_name, agent " + + "FROM " + ConnectDB.getUsageStatsDBSchema() + ".process_portal_log_tmp, " + ConnectDB.getStatsDBSchema() + ".project_oids roid " + "WHERE entity_id IS NOT null AND entity_id=roid.oid AND roid.oid IS NOT null"; @@ -1133,45 +1136,43 @@ public class PiwikStatsDB { Statement stmt = ConnectDB.getConnection().createStatement(); ConnectDB.getConnection().setAutoCommit(false); -// String sql = "insert into piwiklog select * from piwiklogtmp;"; - String sql = - "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".piwiklog " + + System.out.println("====> Inserting data to piwiklog"); + String sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".piwiklog " + "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp"; stmt.executeUpdate(sql); -// sql = "insert into views_stats select * from views_stats_tmp;"; - sql = - "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".views_stats " + + System.out.println("====> Inserting data to views_stats"); + sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".views_stats " + "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".views_stats_tmp"; stmt.executeUpdate(sql); -// sql = "insert into downloads_stats select * from downloads_stats_tmp;"; - sql = - "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats " + + System.out.println("====> Inserting data to downloads_stats"); + sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats " + "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats_tmp"; stmt.executeUpdate(sql); -// sql = "insert into pageviews_stats select * from pageviews_stats_tmp;"; - sql = - "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".pageviews_stats " + + System.out.println("====> Inserting data to pageviews_stats"); + sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".pageviews_stats " + "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".pageviews_stats_tmp"; stmt.executeUpdate(sql); -// sql = "DROP TABLE IF EXISTS views_stats_tmp;"; - sql = ""; + System.out.println("====> Dropping table views_stats_tmp"); + sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".views_stats_tmp"; stmt.executeUpdate(sql); - sql = "DROP TABLE IF EXISTS downloads_stats_tmp;"; + System.out.println("====> Dropping table downloads_stats_tmp"); + sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats_tmp"; stmt.executeUpdate(sql); - sql = "DROP TABLE IF EXISTS pageviews_stats_tmp;"; + System.out.println("====> Dropping table pageviews_stats_tmp"); + sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".pageviews_stats_tmp"; stmt.executeUpdate(sql); - sql = "DROP TABLE IF EXISTS process_portal_log_tmp;"; + System.out.println("====> Dropping table process_portal_log_tmp"); + sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".process_portal_log_tmp"; stmt.executeUpdate(sql); stmt.close(); - ConnectDB.getConnection().commit(); ConnectDB.getConnection().close(); log.info("updateProdTables done"); diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/UsageStatsExporter.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/UsageStatsExporter.java index 2d66093c08..e6586c98dc 100644 --- a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/UsageStatsExporter.java +++ b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/UsageStatsExporter.java @@ -12,13 +12,13 @@ public class UsageStatsExporter { static String matomoAuthToken = "703bd17d845acdaf795e01bb1e0895b9"; static String matomoBaseURL = "analytics.openaire.eu"; - static String repoLogPath = "/user/spyros/logs/usage_stats_logs4/Repologs"; - static String portalLogPath = "/user/spyros/logs/usage_stats_logs4/Portallogs/"; + static String repoLogPath = "/user/spyros/logs/usage_stats_logs5/Repologs"; + static String portalLogPath = "/user/spyros/logs/usage_stats_logs5/Portallogs/"; static String portalMatomoID = "109"; static String irusUKBaseURL = "https://irus.jisc.ac.uk/api/sushilite/v1_7/"; - static String irusUKReportPath = "/user/spyros/logs/usage_stats_logs4/irusUKReports"; - static String sarcsReportPath = "/user/spyros/logs/usage_stats_logs4/sarcReports"; + static String irusUKReportPath = "/user/spyros/logs/usage_stats_logs5/irusUKReports"; + static String sarcsReportPath = "/user/spyros/logs/usage_stats_logs5/sarcReports"; public UsageStatsExporter(Properties properties) { this.properties = properties; @@ -51,8 +51,10 @@ public class UsageStatsExporter { piwikstatsdb.processLogs(); log.info("process logs done"); -// IrusStats irusstats = new IrusStats(irusUKBaseURL); -// irusstats.processIrusRRReport(irusUKReportPath); + System.exit(0); + + IrusStats irusstats = new IrusStats(irusUKBaseURL); + irusstats.processIrusRRReport(irusUKReportPath); // irusstats.irusStats(); // log.info("irus done");