Adding systout messages (should be transformed to log messages)

This commit is contained in:
Spyros Zoupanos 2020-09-07 20:44:01 +03:00
parent e2c70f64ed
commit 719f9e3cd9
3 changed files with 38 additions and 9 deletions

View File

@ -43,7 +43,7 @@ public abstract class ConnectDB {
// Class.forName(properties.getProperty("Stats_db_Driver")); // Class.forName(properties.getProperty("Stats_db_Driver"));
dbURL = "jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000/;UseNativeQuery=1"; dbURL = "jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000/;UseNativeQuery=1";
usageStatsDBSchema = "usagestats_20200902"; usageStatsDBSchema = "usagestats_20200907";
statsDBSchema = "openaire_prod_stats_shadow_20200821"; statsDBSchema = "openaire_prod_stats_shadow_20200821";
Class.forName("org.apache.hive.jdbc.HiveDriver"); Class.forName("org.apache.hive.jdbc.HiveDriver");

View File

@ -167,11 +167,19 @@ public class PiwikStatsDB {
ReadCounterRobotsList counterRobots = new ReadCounterRobotsList(this.getCounterRobotsURL()); ReadCounterRobotsList counterRobots = new ReadCounterRobotsList(this.getCounterRobotsURL());
this.robotsList = counterRobots.getRobotsPatterns(); this.robotsList = counterRobots.getRobotsPatterns();
System.out.println("====> Processing repository logs");
processRepositoryLog(); processRepositoryLog();
System.out.println("====> Repository process done");
log.info("repository process done"); log.info("repository process done");
System.out.println("====> Removing double clicks");
removeDoubleClicks(); removeDoubleClicks();
System.out.println("====> Removing double clicks done");
log.info("removing double clicks done"); log.info("removing double clicks done");
System.out.println("====> Cleaning oai");
cleanOAI(); cleanOAI();
System.out.println("====> Cleaning oai done");
log.info("cleaning oai done"); log.info("cleaning oai done");
viewsStats(); viewsStats();
@ -208,11 +216,16 @@ public class PiwikStatsDB {
Statement stmt = ConnectDB.getConnection().createStatement(); Statement stmt = ConnectDB.getConnection().createStatement();
ConnectDB.getConnection().setAutoCommit(false); ConnectDB.getConnection().setAutoCommit(false);
System.out.println("====> Droping piwiklogtmp_json table");
String drop_piwiklogtmp_json = "DROP TABLE IF EXISTS " + String drop_piwiklogtmp_json = "DROP TABLE IF EXISTS " +
ConnectDB.getUsageStatsDBSchema() + ConnectDB.getUsageStatsDBSchema() +
".piwiklogtmp_json"; ".piwiklogtmp_json";
stmt.executeUpdate(drop_piwiklogtmp_json); stmt.executeUpdate(drop_piwiklogtmp_json);
System.out.println("====> Dropped piwiklogtmp_json table");
System.out.println("====> Creating piwiklogtmp_json");
String create_piwiklogtmp_json = "CREATE EXTERNAL TABLE IF NOT EXISTS " + String create_piwiklogtmp_json = "CREATE EXTERNAL TABLE IF NOT EXISTS " +
ConnectDB.getUsageStatsDBSchema() + ConnectDB.getUsageStatsDBSchema() +
".piwiklogtmp_json(\n" + ".piwiklogtmp_json(\n" +
@ -238,21 +251,33 @@ public class PiwikStatsDB {
"LOCATION '" + UsageStatsExporter.repoLogPath + "'\n" + "LOCATION '" + UsageStatsExporter.repoLogPath + "'\n" +
"TBLPROPERTIES (\"transactional\"=\"false\")"; "TBLPROPERTIES (\"transactional\"=\"false\")";
stmt.executeUpdate(create_piwiklogtmp_json); stmt.executeUpdate(create_piwiklogtmp_json);
System.out.println("====> Created piwiklogtmp_json");
System.out.println("====> Droping piwiklogtmp table");
String drop_piwiklogtmp = "DROP TABLE IF EXISTS " + String drop_piwiklogtmp = "DROP TABLE IF EXISTS " +
ConnectDB.getUsageStatsDBSchema() + ConnectDB.getUsageStatsDBSchema() +
".piwiklogtmp"; ".piwiklogtmp";
stmt.executeUpdate(drop_piwiklogtmp); stmt.executeUpdate(drop_piwiklogtmp);
System.out.println("====> Created piwiklogtmp_json");
System.out.println("====> Creating piwiklogtmp");
String create_piwiklogtmp = "CREATE TABLE " + String create_piwiklogtmp = "CREATE TABLE " +
ConnectDB.getUsageStatsDBSchema() + ConnectDB.getUsageStatsDBSchema() +
".piwiklogtmp (source BIGINT, id_Visit STRING, country STRING, action STRING, url STRING, " + ".piwiklogtmp (source BIGINT, id_Visit STRING, country STRING, action STRING, url STRING, " +
"entity_id STRING, source_item_type STRING, timestamp STRING, referrer_name STRING, agent STRING) " + "entity_id STRING, source_item_type STRING, timestamp STRING, referrer_name STRING, agent STRING) " +
"clustered by (source) into 100 buckets stored as orc tblproperties('transactional'='true')"; "clustered by (source) into 100 buckets stored as orc tblproperties('transactional'='true')";
stmt.executeUpdate(create_piwiklogtmp); stmt.executeUpdate(create_piwiklogtmp);
System.out.println("====> Created piwiklogtmp");
System.out.println("====> Adding JSON Serde jar");
stmt.executeUpdate("add jar /usr/share/cmf/common_jars/hive-hcatalog-core-1.1.0-cdh5.14.0.jar"); stmt.executeUpdate("add jar /usr/share/cmf/common_jars/hive-hcatalog-core-1.1.0-cdh5.14.0.jar");
System.out.println("====> Added JSON Serde jar");
System.out.println("====> Inserting into piwiklogtmp");
String insert_piwiklogtmp = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " + String insert_piwiklogtmp = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SELECT DISTINCT cast(idSite as BIGINT) as source, idVisit as id_Visit, country, " + "SELECT DISTINCT cast(idSite as BIGINT) as source, idVisit as id_Visit, country, " +
"actiondetail.type as action, actiondetail.url as url, " + "actiondetail.type as action, actiondetail.url as url, " +
@ -262,11 +287,11 @@ public class PiwikStatsDB {
"FROM " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp_json\n" + "FROM " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp_json\n" +
"LATERAL VIEW explode(actiondetails) actiondetailsTable AS actiondetail"; "LATERAL VIEW explode(actiondetails) actiondetailsTable AS actiondetail";
stmt.executeUpdate(insert_piwiklogtmp); stmt.executeUpdate(insert_piwiklogtmp);
System.out.println("====> Inserted into piwiklogtmp");
ConnectDB.getConnection().commit();
stmt.close(); stmt.close();
System.exit(0);
// ArrayList<String> jsonFiles = listHdfsDir(this.logRepoPath); // ArrayList<String> jsonFiles = listHdfsDir(this.logRepoPath);
//// File dir = new File(this.logRepoPath); //// File dir = new File(this.logRepoPath);
//// File[] jsonFiles = dir.listFiles(); //// File[] jsonFiles = dir.listFiles();

View File

@ -17,13 +17,13 @@ public class UsageStatsExporter {
static String matomoAuthToken = "703bd17d845acdaf795e01bb1e0895b9"; static String matomoAuthToken = "703bd17d845acdaf795e01bb1e0895b9";
static String matomoBaseURL = "analytics.openaire.eu"; static String matomoBaseURL = "analytics.openaire.eu";
static String repoLogPath = "/user/spyros/logs/usage_stats_logs2/Repologs"; static String repoLogPath = "/user/spyros/logs/usage_stats_logs3/Repologs";
static String portalLogPath = "/user/spyros/logs/usage_stats_logs2/Portallogs/"; static String portalLogPath = "/user/spyros/logs/usage_stats_logs3/Portallogs/";
static String portalMatomoID = "109"; static String portalMatomoID = "109";
static String irusUKBaseURL = "https://irus.jisc.ac.uk/api/sushilite/v1_7/"; static String irusUKBaseURL = "https://irus.jisc.ac.uk/api/sushilite/v1_7/";
static String irusUKReportPath = "/user/spyros/logs/usage_stats_logs2/irusUKReports"; static String irusUKReportPath = "/user/spyros/logs/usage_stats_logs3/irusUKReports";
static String sarcsReportPath = "/user/spyros/logs/usage_stats_logs2/sarcReports"; static String sarcsReportPath = "/user/spyros/logs/usage_stats_logs3/sarcReports";
private static final Class[] parameters = new Class[] { private static final Class[] parameters = new Class[] {
URL.class URL.class
@ -71,16 +71,19 @@ public class UsageStatsExporter {
// String portalMatomoID = properties.getProperty("portal_MatomoID"); // String portalMatomoID = properties.getProperty("portal_MatomoID");
// String irusUKBaseURL = properties.getProperty("IRUS_UK_BaseUrl"); // String irusUKBaseURL = properties.getProperty("IRUS_UK_BaseUrl");
addFile("/usr/share/cmf/common_jars/hive-hcatalog-core-1.1.0-cdh5.14.0.jar"); // addFile("/usr/share/cmf/common_jars/hive-hcatalog-core-1.1.0-cdh5.14.0.jar");
// connect to DB // connect to DB
System.out.println("====> Initialising DB properties");
ConnectDB.init(properties); ConnectDB.init(properties);
// Create DB tables - they are also needed to download the statistics too // Create DB tables - they are also needed to download the statistics too
System.out.println("====> Creating database and tables");
PiwikStatsDB piwikstatsdb = new PiwikStatsDB(repoLogPath, portalLogPath); PiwikStatsDB piwikstatsdb = new PiwikStatsDB(repoLogPath, portalLogPath);
// //
// // Download the statistics - The following 2 lines are not needed after the download - Commenting them out for // // Download the statistics - The following 2 lines are not needed after the download - Commenting them out for
// // the moment // // the moment
System.out.println("====> Initializing the download logs module");
PiwikDownloadLogs piwd = new PiwikDownloadLogs(matomoBaseURL, matomoAuthToken); PiwikDownloadLogs piwd = new PiwikDownloadLogs(matomoBaseURL, matomoAuthToken);
// piwd.GetOpenAIRELogs(repoLogPath, portalLogPath, portalMatomoID); // piwd.GetOpenAIRELogs(repoLogPath, portalLogPath, portalMatomoID);
@ -88,6 +91,7 @@ public class UsageStatsExporter {
// String cRobotsUrl = properties.getProperty("COUNTER_robots_Url"); // String cRobotsUrl = properties.getProperty("COUNTER_robots_Url");
String cRobotsUrl = "https://raw.githubusercontent.com/atmire/COUNTER-Robots/master/COUNTER_Robots_list.json"; String cRobotsUrl = "https://raw.githubusercontent.com/atmire/COUNTER-Robots/master/COUNTER_Robots_list.json";
piwikstatsdb.setCounterRobotsURL(cRobotsUrl); piwikstatsdb.setCounterRobotsURL(cRobotsUrl);
System.out.println("====> Processing logs");
piwikstatsdb.processLogs(); piwikstatsdb.processLogs();
log.info("process logs done"); log.info("process logs done");