forked from D-Net/dnet-hadoop
processLogs.updateProdTables. I need feedback for processLogs.portalStats to see wy they never end
This commit is contained in:
parent
9caac3e3e3
commit
f8e91cdc5c
|
@ -43,7 +43,7 @@ public abstract class ConnectDB {
|
|||
// Class.forName(properties.getProperty("Stats_db_Driver"));
|
||||
|
||||
dbURL = "jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000/;UseNativeQuery=1";
|
||||
usageStatsDBSchema = "usagestats_20200907";
|
||||
usageStatsDBSchema = "usagestats_20200913";
|
||||
statsDBSchema = "openaire_prod_stats_shadow_20200821";
|
||||
|
||||
Class.forName("org.apache.hive.jdbc.HiveDriver");
|
||||
|
|
|
@ -43,7 +43,7 @@ public class PiwikStatsDB {
|
|||
this.createTables();
|
||||
// The piwiklog table is not needed since it is built
|
||||
// on top of JSON files
|
||||
// this.createTmpTables();
|
||||
this.createTmpTables();
|
||||
}
|
||||
|
||||
public void foo() {
|
||||
|
@ -168,7 +168,7 @@ public class PiwikStatsDB {
|
|||
this.robotsList = counterRobots.getRobotsPatterns();
|
||||
|
||||
System.out.println("====> Processing repository logs");
|
||||
processRepositoryLog();
|
||||
// processRepositoryLog();
|
||||
System.out.println("====> Repository logs process done");
|
||||
log.info("repository process done");
|
||||
|
||||
|
@ -196,13 +196,14 @@ public class PiwikStatsDB {
|
|||
log.info("portal process done");
|
||||
|
||||
System.out.println("====> Processing portal usagestats");
|
||||
portalStats();
|
||||
// To see why this never ends
|
||||
// portalStats();
|
||||
log.info("portal usagestats done");
|
||||
System.out.println("====> Portal usagestats process done");
|
||||
|
||||
System.exit(0);
|
||||
|
||||
updateProdTables();
|
||||
System.out.println("====> Updating Production Tables");
|
||||
// updateProdTables();
|
||||
System.out.println("====> Updated Production Tables");
|
||||
log.info("updateProdTables done");
|
||||
|
||||
} catch (Exception e) {
|
||||
|
@ -460,7 +461,7 @@ public class PiwikStatsDB {
|
|||
stmt.executeUpdate(drop_views_stats);
|
||||
System.out.println("====> Dropped downloads_stats_tmp table");
|
||||
|
||||
System.out.println("====> Creating downloads_stats_tmp view");
|
||||
System.out.println("====> Creating downloads_stats_tmp table");
|
||||
sql = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats_tmp AS " +
|
||||
"SELECT 'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date, " +
|
||||
"max(downloads) AS count, max(openaire_referrer) AS openaire " +
|
||||
|
@ -469,7 +470,7 @@ public class PiwikStatsDB {
|
|||
"WHERE p.source=d.piwik_id and p.id=ro.oid " +
|
||||
"GROUP BY d.id, ro.id, month " +
|
||||
"ORDER BY d.id, ro.id, month";
|
||||
System.out.println("====> Created downloads_stats_tmp view");
|
||||
System.out.println("====> Created downloads_stats_tmp table");
|
||||
stmt.executeUpdate(sql);
|
||||
|
||||
System.out.println("====> Dropping downloads_stats table");
|
||||
|
@ -486,15 +487,13 @@ public class PiwikStatsDB {
|
|||
stmt.executeUpdate(create_pageviews_stats);
|
||||
System.out.println("====> Created downloads_stats table");
|
||||
|
||||
System.out.println("====> Dropping pageviews_stats table");
|
||||
System.out.println("====> Dropping result_downloads_monthly_tmp view");
|
||||
sql = "DROP VIEW IF EXISTS result_downloads_monthly_tmp";
|
||||
System.out.println("====> Dropped pageviews_stats table");
|
||||
System.out.println("====> Dropped result_downloads_monthly_tmp view");
|
||||
stmt.executeUpdate(sql);
|
||||
|
||||
stmt.close();
|
||||
ConnectDB.getConnection().close();
|
||||
|
||||
System.exit(0);
|
||||
}
|
||||
|
||||
public void finalizeStats() throws Exception {
|
||||
|
@ -811,7 +810,8 @@ public class PiwikStatsDB {
|
|||
|
||||
System.out.println("====> PortalStats - Step 1");
|
||||
String sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SELECT DISTINCT source, id_visit, country, action, url, roid.oid, 'oaItem', timestamp, referrer_name, agent " +
|
||||
"SELECT DISTINCT source, id_visit, country, action, url, roid.oid, 'oaItem', timestamp, referrer_name, agent "
|
||||
+
|
||||
"FROM " + ConnectDB.getUsageStatsDBSchema() + ".process_portal_log_tmp, " +
|
||||
ConnectDB.getStatsDBSchema() + ".result_oids roid " +
|
||||
"WHERE entity_id IS NOT null AND entity_id=roid.oid AND roid.oid IS NOT null";
|
||||
|
@ -821,7 +821,8 @@ public class PiwikStatsDB {
|
|||
System.out.println("====> PortalStats - Step 2");
|
||||
stmt = con.createStatement();
|
||||
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SELECT DISTINCT source, id_visit, country, action, url, roid.oid, 'datasource', timestamp, referrer_name, agent " +
|
||||
"SELECT DISTINCT source, id_visit, country, action, url, roid.oid, 'datasource', timestamp, referrer_name, agent "
|
||||
+
|
||||
"FROM " + ConnectDB.getUsageStatsDBSchema() + ".process_portal_log_tmp, " +
|
||||
ConnectDB.getStatsDBSchema() + ".datasource_oids roid " +
|
||||
"WHERE entity_id IS NOT null AND entity_id=roid.oid AND roid.oid IS NOT null";
|
||||
|
@ -831,7 +832,8 @@ public class PiwikStatsDB {
|
|||
System.out.println("====> PortalStats - Step 3");
|
||||
stmt = con.createStatement();
|
||||
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SELECT DISTINCT source, id_visit, country, action, url, roid.oid, 'organization', timestamp, referrer_name, agent " +
|
||||
"SELECT DISTINCT source, id_visit, country, action, url, roid.oid, 'organization', timestamp, referrer_name, agent "
|
||||
+
|
||||
"FROM " + ConnectDB.getUsageStatsDBSchema() + ".process_portal_log_tmp, " +
|
||||
ConnectDB.getStatsDBSchema() + ".datasource_oids roid " +
|
||||
"WHERE entity_id IS NOT null AND entity_id=roid.oid AND roid.oid IS NOT null";
|
||||
|
@ -841,7 +843,8 @@ public class PiwikStatsDB {
|
|||
System.out.println("====> PortalStats - Step 4");
|
||||
stmt = con.createStatement();
|
||||
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SELECT DISTINCT source, id_visit, country, action, url, roid.oid, 'project', timestamp, referrer_name, agent " +
|
||||
"SELECT DISTINCT source, id_visit, country, action, url, roid.oid, 'project', timestamp, referrer_name, agent "
|
||||
+
|
||||
"FROM " + ConnectDB.getUsageStatsDBSchema() + ".process_portal_log_tmp, " +
|
||||
ConnectDB.getStatsDBSchema() + ".project_oids roid " +
|
||||
"WHERE entity_id IS NOT null AND entity_id=roid.oid AND roid.oid IS NOT null";
|
||||
|
@ -1133,45 +1136,43 @@ public class PiwikStatsDB {
|
|||
Statement stmt = ConnectDB.getConnection().createStatement();
|
||||
ConnectDB.getConnection().setAutoCommit(false);
|
||||
|
||||
// String sql = "insert into piwiklog select * from piwiklogtmp;";
|
||||
String sql =
|
||||
"INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".piwiklog " +
|
||||
System.out.println("====> Inserting data to piwiklog");
|
||||
String sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".piwiklog " +
|
||||
"SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp";
|
||||
stmt.executeUpdate(sql);
|
||||
|
||||
// sql = "insert into views_stats select * from views_stats_tmp;";
|
||||
sql =
|
||||
"INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".views_stats " +
|
||||
System.out.println("====> Inserting data to views_stats");
|
||||
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".views_stats " +
|
||||
"SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".views_stats_tmp";
|
||||
stmt.executeUpdate(sql);
|
||||
|
||||
// sql = "insert into downloads_stats select * from downloads_stats_tmp;";
|
||||
sql =
|
||||
"INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats " +
|
||||
System.out.println("====> Inserting data to downloads_stats");
|
||||
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats " +
|
||||
"SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats_tmp";
|
||||
stmt.executeUpdate(sql);
|
||||
|
||||
// sql = "insert into pageviews_stats select * from pageviews_stats_tmp;";
|
||||
sql =
|
||||
"INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".pageviews_stats " +
|
||||
System.out.println("====> Inserting data to pageviews_stats");
|
||||
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".pageviews_stats " +
|
||||
"SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".pageviews_stats_tmp";
|
||||
stmt.executeUpdate(sql);
|
||||
|
||||
// sql = "DROP TABLE IF EXISTS views_stats_tmp;";
|
||||
sql = "";
|
||||
System.out.println("====> Dropping table views_stats_tmp");
|
||||
sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".views_stats_tmp";
|
||||
stmt.executeUpdate(sql);
|
||||
|
||||
sql = "DROP TABLE IF EXISTS downloads_stats_tmp;";
|
||||
System.out.println("====> Dropping table downloads_stats_tmp");
|
||||
sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats_tmp";
|
||||
stmt.executeUpdate(sql);
|
||||
|
||||
sql = "DROP TABLE IF EXISTS pageviews_stats_tmp;";
|
||||
System.out.println("====> Dropping table pageviews_stats_tmp");
|
||||
sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".pageviews_stats_tmp";
|
||||
stmt.executeUpdate(sql);
|
||||
|
||||
sql = "DROP TABLE IF EXISTS process_portal_log_tmp;";
|
||||
System.out.println("====> Dropping table process_portal_log_tmp");
|
||||
sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".process_portal_log_tmp";
|
||||
stmt.executeUpdate(sql);
|
||||
|
||||
stmt.close();
|
||||
ConnectDB.getConnection().commit();
|
||||
ConnectDB.getConnection().close();
|
||||
|
||||
log.info("updateProdTables done");
|
||||
|
|
|
@ -12,13 +12,13 @@ public class UsageStatsExporter {
|
|||
|
||||
static String matomoAuthToken = "703bd17d845acdaf795e01bb1e0895b9";
|
||||
static String matomoBaseURL = "analytics.openaire.eu";
|
||||
static String repoLogPath = "/user/spyros/logs/usage_stats_logs4/Repologs";
|
||||
static String portalLogPath = "/user/spyros/logs/usage_stats_logs4/Portallogs/";
|
||||
static String repoLogPath = "/user/spyros/logs/usage_stats_logs5/Repologs";
|
||||
static String portalLogPath = "/user/spyros/logs/usage_stats_logs5/Portallogs/";
|
||||
static String portalMatomoID = "109";
|
||||
static String irusUKBaseURL = "https://irus.jisc.ac.uk/api/sushilite/v1_7/";
|
||||
|
||||
static String irusUKReportPath = "/user/spyros/logs/usage_stats_logs4/irusUKReports";
|
||||
static String sarcsReportPath = "/user/spyros/logs/usage_stats_logs4/sarcReports";
|
||||
static String irusUKReportPath = "/user/spyros/logs/usage_stats_logs5/irusUKReports";
|
||||
static String sarcsReportPath = "/user/spyros/logs/usage_stats_logs5/sarcReports";
|
||||
|
||||
public UsageStatsExporter(Properties properties) {
|
||||
this.properties = properties;
|
||||
|
@ -51,8 +51,10 @@ public class UsageStatsExporter {
|
|||
piwikstatsdb.processLogs();
|
||||
log.info("process logs done");
|
||||
|
||||
// IrusStats irusstats = new IrusStats(irusUKBaseURL);
|
||||
// irusstats.processIrusRRReport(irusUKReportPath);
|
||||
System.exit(0);
|
||||
|
||||
IrusStats irusstats = new IrusStats(irusUKBaseURL);
|
||||
irusstats.processIrusRRReport(irusUKReportPath);
|
||||
|
||||
// irusstats.irusStats();
|
||||
// log.info("irus done");
|
||||
|
|
Loading…
Reference in New Issue