From 719f9e3cd9a607a2ad6e0047a2ff7e6448e4c89a Mon Sep 17 00:00:00 2001 From: Spyros Zoupanos Date: Mon, 7 Sep 2020 20:44:01 +0300 Subject: [PATCH] Adding systout messages (should be transformed to log messages) --- .../oa/graph/usagestats/export/ConnectDB.java | 2 +- .../graph/usagestats/export/PiwikStatsDB.java | 31 +++++++++++++++++-- .../usagestats/export/UsageStatsExporter.java | 14 ++++++--- 3 files changed, 38 insertions(+), 9 deletions(-) diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/ConnectDB.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/ConnectDB.java index 23a8d85f7b..7f89c2942f 100644 --- a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/ConnectDB.java +++ b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/ConnectDB.java @@ -43,7 +43,7 @@ public abstract class ConnectDB { // Class.forName(properties.getProperty("Stats_db_Driver")); dbURL = "jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000/;UseNativeQuery=1"; - usageStatsDBSchema = "usagestats_20200902"; + usageStatsDBSchema = "usagestats_20200907"; statsDBSchema = "openaire_prod_stats_shadow_20200821"; Class.forName("org.apache.hive.jdbc.HiveDriver"); diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/PiwikStatsDB.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/PiwikStatsDB.java index 208c16df90..17ca03d045 100644 --- a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/PiwikStatsDB.java +++ b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/PiwikStatsDB.java @@ -167,11 +167,19 @@ public class PiwikStatsDB { ReadCounterRobotsList counterRobots = new ReadCounterRobotsList(this.getCounterRobotsURL()); this.robotsList = counterRobots.getRobotsPatterns(); + System.out.println("====> Processing repository logs"); processRepositoryLog(); + System.out.println("====> Repository process done"); log.info("repository process done"); + + System.out.println("====> Removing double clicks"); removeDoubleClicks(); + System.out.println("====> Removing double clicks done"); log.info("removing double clicks done"); + + System.out.println("====> Cleaning oai"); cleanOAI(); + System.out.println("====> Cleaning oai done"); log.info("cleaning oai done"); viewsStats(); @@ -208,11 +216,16 @@ public class PiwikStatsDB { Statement stmt = ConnectDB.getConnection().createStatement(); ConnectDB.getConnection().setAutoCommit(false); + + System.out.println("====> Droping piwiklogtmp_json table"); String drop_piwiklogtmp_json = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp_json"; stmt.executeUpdate(drop_piwiklogtmp_json); + System.out.println("====> Dropped piwiklogtmp_json table"); + + System.out.println("====> Creating piwiklogtmp_json"); String create_piwiklogtmp_json = "CREATE EXTERNAL TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp_json(\n" + @@ -238,21 +251,33 @@ public class PiwikStatsDB { "LOCATION '" + UsageStatsExporter.repoLogPath + "'\n" + "TBLPROPERTIES (\"transactional\"=\"false\")"; stmt.executeUpdate(create_piwiklogtmp_json); + System.out.println("====> Created piwiklogtmp_json"); + + System.out.println("====> Droping piwiklogtmp table"); String drop_piwiklogtmp = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp"; stmt.executeUpdate(drop_piwiklogtmp); + System.out.println("====> Created piwiklogtmp_json"); + + System.out.println("====> Creating piwiklogtmp"); String create_piwiklogtmp = "CREATE TABLE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp (source BIGINT, id_Visit STRING, country STRING, action STRING, url STRING, " + "entity_id STRING, source_item_type STRING, timestamp STRING, referrer_name STRING, agent STRING) " + "clustered by (source) into 100 buckets stored as orc tblproperties('transactional'='true')"; stmt.executeUpdate(create_piwiklogtmp); - + System.out.println("====> Created piwiklogtmp"); + + + System.out.println("====> Adding JSON Serde jar"); stmt.executeUpdate("add jar /usr/share/cmf/common_jars/hive-hcatalog-core-1.1.0-cdh5.14.0.jar"); + System.out.println("====> Added JSON Serde jar"); + + System.out.println("====> Inserting into piwiklogtmp"); String insert_piwiklogtmp = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " + "SELECT DISTINCT cast(idSite as BIGINT) as source, idVisit as id_Visit, country, " + "actiondetail.type as action, actiondetail.url as url, " + @@ -262,11 +287,11 @@ public class PiwikStatsDB { "FROM " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp_json\n" + "LATERAL VIEW explode(actiondetails) actiondetailsTable AS actiondetail"; stmt.executeUpdate(insert_piwiklogtmp); + System.out.println("====> Inserted into piwiklogtmp"); + ConnectDB.getConnection().commit(); stmt.close(); - System.exit(0); - // ArrayList jsonFiles = listHdfsDir(this.logRepoPath); //// File dir = new File(this.logRepoPath); //// File[] jsonFiles = dir.listFiles(); diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/UsageStatsExporter.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/UsageStatsExporter.java index b48a5e2921..baf475d25d 100644 --- a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/UsageStatsExporter.java +++ b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/UsageStatsExporter.java @@ -17,13 +17,13 @@ public class UsageStatsExporter { static String matomoAuthToken = "703bd17d845acdaf795e01bb1e0895b9"; static String matomoBaseURL = "analytics.openaire.eu"; - static String repoLogPath = "/user/spyros/logs/usage_stats_logs2/Repologs"; - static String portalLogPath = "/user/spyros/logs/usage_stats_logs2/Portallogs/"; + static String repoLogPath = "/user/spyros/logs/usage_stats_logs3/Repologs"; + static String portalLogPath = "/user/spyros/logs/usage_stats_logs3/Portallogs/"; static String portalMatomoID = "109"; static String irusUKBaseURL = "https://irus.jisc.ac.uk/api/sushilite/v1_7/"; - static String irusUKReportPath = "/user/spyros/logs/usage_stats_logs2/irusUKReports"; - static String sarcsReportPath = "/user/spyros/logs/usage_stats_logs2/sarcReports"; + static String irusUKReportPath = "/user/spyros/logs/usage_stats_logs3/irusUKReports"; + static String sarcsReportPath = "/user/spyros/logs/usage_stats_logs3/sarcReports"; private static final Class[] parameters = new Class[] { URL.class @@ -71,16 +71,19 @@ public class UsageStatsExporter { // String portalMatomoID = properties.getProperty("portal_MatomoID"); // String irusUKBaseURL = properties.getProperty("IRUS_UK_BaseUrl"); - addFile("/usr/share/cmf/common_jars/hive-hcatalog-core-1.1.0-cdh5.14.0.jar"); +// addFile("/usr/share/cmf/common_jars/hive-hcatalog-core-1.1.0-cdh5.14.0.jar"); // connect to DB + System.out.println("====> Initialising DB properties"); ConnectDB.init(properties); // Create DB tables - they are also needed to download the statistics too + System.out.println("====> Creating database and tables"); PiwikStatsDB piwikstatsdb = new PiwikStatsDB(repoLogPath, portalLogPath); // // // Download the statistics - The following 2 lines are not needed after the download - Commenting them out for // // the moment + System.out.println("====> Initializing the download logs module"); PiwikDownloadLogs piwd = new PiwikDownloadLogs(matomoBaseURL, matomoAuthToken); // piwd.GetOpenAIRELogs(repoLogPath, portalLogPath, portalMatomoID); @@ -88,6 +91,7 @@ public class UsageStatsExporter { // String cRobotsUrl = properties.getProperty("COUNTER_robots_Url"); String cRobotsUrl = "https://raw.githubusercontent.com/atmire/COUNTER-Robots/master/COUNTER_Robots_list.json"; piwikstatsdb.setCounterRobotsURL(cRobotsUrl); + System.out.println("====> Processing logs"); piwikstatsdb.processLogs(); log.info("process logs done");