forked from D-Net/dnet-hadoop
Adding systout messages (should be transformed to log messages)
This commit is contained in:
parent
e2c70f64ed
commit
719f9e3cd9
|
@ -43,7 +43,7 @@ public abstract class ConnectDB {
|
|||
// Class.forName(properties.getProperty("Stats_db_Driver"));
|
||||
|
||||
dbURL = "jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000/;UseNativeQuery=1";
|
||||
usageStatsDBSchema = "usagestats_20200902";
|
||||
usageStatsDBSchema = "usagestats_20200907";
|
||||
statsDBSchema = "openaire_prod_stats_shadow_20200821";
|
||||
|
||||
Class.forName("org.apache.hive.jdbc.HiveDriver");
|
||||
|
|
|
@ -167,11 +167,19 @@ public class PiwikStatsDB {
|
|||
ReadCounterRobotsList counterRobots = new ReadCounterRobotsList(this.getCounterRobotsURL());
|
||||
this.robotsList = counterRobots.getRobotsPatterns();
|
||||
|
||||
System.out.println("====> Processing repository logs");
|
||||
processRepositoryLog();
|
||||
System.out.println("====> Repository process done");
|
||||
log.info("repository process done");
|
||||
|
||||
System.out.println("====> Removing double clicks");
|
||||
removeDoubleClicks();
|
||||
System.out.println("====> Removing double clicks done");
|
||||
log.info("removing double clicks done");
|
||||
|
||||
System.out.println("====> Cleaning oai");
|
||||
cleanOAI();
|
||||
System.out.println("====> Cleaning oai done");
|
||||
log.info("cleaning oai done");
|
||||
|
||||
viewsStats();
|
||||
|
@ -208,11 +216,16 @@ public class PiwikStatsDB {
|
|||
Statement stmt = ConnectDB.getConnection().createStatement();
|
||||
ConnectDB.getConnection().setAutoCommit(false);
|
||||
|
||||
|
||||
System.out.println("====> Droping piwiklogtmp_json table");
|
||||
String drop_piwiklogtmp_json = "DROP TABLE IF EXISTS " +
|
||||
ConnectDB.getUsageStatsDBSchema() +
|
||||
".piwiklogtmp_json";
|
||||
stmt.executeUpdate(drop_piwiklogtmp_json);
|
||||
System.out.println("====> Dropped piwiklogtmp_json table");
|
||||
|
||||
|
||||
System.out.println("====> Creating piwiklogtmp_json");
|
||||
String create_piwiklogtmp_json = "CREATE EXTERNAL TABLE IF NOT EXISTS " +
|
||||
ConnectDB.getUsageStatsDBSchema() +
|
||||
".piwiklogtmp_json(\n" +
|
||||
|
@ -238,21 +251,33 @@ public class PiwikStatsDB {
|
|||
"LOCATION '" + UsageStatsExporter.repoLogPath + "'\n" +
|
||||
"TBLPROPERTIES (\"transactional\"=\"false\")";
|
||||
stmt.executeUpdate(create_piwiklogtmp_json);
|
||||
System.out.println("====> Created piwiklogtmp_json");
|
||||
|
||||
|
||||
System.out.println("====> Droping piwiklogtmp table");
|
||||
String drop_piwiklogtmp = "DROP TABLE IF EXISTS " +
|
||||
ConnectDB.getUsageStatsDBSchema() +
|
||||
".piwiklogtmp";
|
||||
stmt.executeUpdate(drop_piwiklogtmp);
|
||||
System.out.println("====> Created piwiklogtmp_json");
|
||||
|
||||
|
||||
System.out.println("====> Creating piwiklogtmp");
|
||||
String create_piwiklogtmp = "CREATE TABLE " +
|
||||
ConnectDB.getUsageStatsDBSchema() +
|
||||
".piwiklogtmp (source BIGINT, id_Visit STRING, country STRING, action STRING, url STRING, " +
|
||||
"entity_id STRING, source_item_type STRING, timestamp STRING, referrer_name STRING, agent STRING) " +
|
||||
"clustered by (source) into 100 buckets stored as orc tblproperties('transactional'='true')";
|
||||
stmt.executeUpdate(create_piwiklogtmp);
|
||||
|
||||
System.out.println("====> Created piwiklogtmp");
|
||||
|
||||
|
||||
System.out.println("====> Adding JSON Serde jar");
|
||||
stmt.executeUpdate("add jar /usr/share/cmf/common_jars/hive-hcatalog-core-1.1.0-cdh5.14.0.jar");
|
||||
System.out.println("====> Added JSON Serde jar");
|
||||
|
||||
|
||||
System.out.println("====> Inserting into piwiklogtmp");
|
||||
String insert_piwiklogtmp = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SELECT DISTINCT cast(idSite as BIGINT) as source, idVisit as id_Visit, country, " +
|
||||
"actiondetail.type as action, actiondetail.url as url, " +
|
||||
|
@ -262,11 +287,11 @@ public class PiwikStatsDB {
|
|||
"FROM " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp_json\n" +
|
||||
"LATERAL VIEW explode(actiondetails) actiondetailsTable AS actiondetail";
|
||||
stmt.executeUpdate(insert_piwiklogtmp);
|
||||
System.out.println("====> Inserted into piwiklogtmp");
|
||||
|
||||
ConnectDB.getConnection().commit();
|
||||
stmt.close();
|
||||
|
||||
System.exit(0);
|
||||
|
||||
// ArrayList<String> jsonFiles = listHdfsDir(this.logRepoPath);
|
||||
//// File dir = new File(this.logRepoPath);
|
||||
//// File[] jsonFiles = dir.listFiles();
|
||||
|
|
|
@ -17,13 +17,13 @@ public class UsageStatsExporter {
|
|||
|
||||
static String matomoAuthToken = "703bd17d845acdaf795e01bb1e0895b9";
|
||||
static String matomoBaseURL = "analytics.openaire.eu";
|
||||
static String repoLogPath = "/user/spyros/logs/usage_stats_logs2/Repologs";
|
||||
static String portalLogPath = "/user/spyros/logs/usage_stats_logs2/Portallogs/";
|
||||
static String repoLogPath = "/user/spyros/logs/usage_stats_logs3/Repologs";
|
||||
static String portalLogPath = "/user/spyros/logs/usage_stats_logs3/Portallogs/";
|
||||
static String portalMatomoID = "109";
|
||||
static String irusUKBaseURL = "https://irus.jisc.ac.uk/api/sushilite/v1_7/";
|
||||
|
||||
static String irusUKReportPath = "/user/spyros/logs/usage_stats_logs2/irusUKReports";
|
||||
static String sarcsReportPath = "/user/spyros/logs/usage_stats_logs2/sarcReports";
|
||||
static String irusUKReportPath = "/user/spyros/logs/usage_stats_logs3/irusUKReports";
|
||||
static String sarcsReportPath = "/user/spyros/logs/usage_stats_logs3/sarcReports";
|
||||
|
||||
private static final Class[] parameters = new Class[] {
|
||||
URL.class
|
||||
|
@ -71,16 +71,19 @@ public class UsageStatsExporter {
|
|||
// String portalMatomoID = properties.getProperty("portal_MatomoID");
|
||||
// String irusUKBaseURL = properties.getProperty("IRUS_UK_BaseUrl");
|
||||
|
||||
addFile("/usr/share/cmf/common_jars/hive-hcatalog-core-1.1.0-cdh5.14.0.jar");
|
||||
// addFile("/usr/share/cmf/common_jars/hive-hcatalog-core-1.1.0-cdh5.14.0.jar");
|
||||
|
||||
// connect to DB
|
||||
System.out.println("====> Initialising DB properties");
|
||||
ConnectDB.init(properties);
|
||||
|
||||
// Create DB tables - they are also needed to download the statistics too
|
||||
System.out.println("====> Creating database and tables");
|
||||
PiwikStatsDB piwikstatsdb = new PiwikStatsDB(repoLogPath, portalLogPath);
|
||||
//
|
||||
// // Download the statistics - The following 2 lines are not needed after the download - Commenting them out for
|
||||
// // the moment
|
||||
System.out.println("====> Initializing the download logs module");
|
||||
PiwikDownloadLogs piwd = new PiwikDownloadLogs(matomoBaseURL, matomoAuthToken);
|
||||
// piwd.GetOpenAIRELogs(repoLogPath, portalLogPath, portalMatomoID);
|
||||
|
||||
|
@ -88,6 +91,7 @@ public class UsageStatsExporter {
|
|||
// String cRobotsUrl = properties.getProperty("COUNTER_robots_Url");
|
||||
String cRobotsUrl = "https://raw.githubusercontent.com/atmire/COUNTER-Robots/master/COUNTER_Robots_list.json";
|
||||
piwikstatsdb.setCounterRobotsURL(cRobotsUrl);
|
||||
System.out.println("====> Processing logs");
|
||||
piwikstatsdb.processLogs();
|
||||
log.info("process logs done");
|
||||
|
||||
|
|
Loading…
Reference in New Issue