forked from D-Net/dnet-hadoop
processIrusStats done
This commit is contained in:
parent
3d5904fb41
commit
08a102a76c
|
@ -83,38 +83,108 @@ public class IrusStats {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// The following may not be needed - It will be created when JSON tables are created
|
// // The following may not be needed - It will be created when JSON tables are created
|
||||||
private void createTmpTables() throws Exception {
|
// private void createTmpTables() throws Exception {
|
||||||
try {
|
// try {
|
||||||
|
//
|
||||||
|
// Statement stmt = ConnectDB.getConnection().createStatement();
|
||||||
|
// String sqlCreateTableSushiLog = "CREATE TABLE IF NOT EXISTS sushilogtmp(source TEXT, repository TEXT, rid TEXT, date TEXT, metric_type TEXT, count INT, PRIMARY KEY(source, repository, rid, date, metric_type));";
|
||||||
|
// stmt.executeUpdate(sqlCreateTableSushiLog);
|
||||||
|
//
|
||||||
|
// // stmt.executeUpdate("CREATE TABLE IF NOT EXISTS public.sushilog AS TABLE sushilog;");
|
||||||
|
// // String sqlCopyPublicSushiLog = "INSERT INTO sushilog SELECT * FROM public.sushilog;";
|
||||||
|
// // stmt.executeUpdate(sqlCopyPublicSushiLog);
|
||||||
|
// String sqlcreateRuleSushiLog = "CREATE OR REPLACE RULE ignore_duplicate_inserts AS "
|
||||||
|
// + " ON INSERT TO sushilogtmp "
|
||||||
|
// + " WHERE (EXISTS ( SELECT sushilogtmp.source, sushilogtmp.repository,"
|
||||||
|
// + "sushilogtmp.rid, sushilogtmp.date "
|
||||||
|
// + "FROM sushilogtmp "
|
||||||
|
// + "WHERE sushilogtmp.source = new.source AND sushilogtmp.repository = new.repository AND sushilogtmp.rid = new.rid AND sushilogtmp.date = new.date AND sushilogtmp.metric_type = new.metric_type)) DO INSTEAD NOTHING;";
|
||||||
|
// stmt.executeUpdate(sqlcreateRuleSushiLog);
|
||||||
|
//
|
||||||
|
// stmt.close();
|
||||||
|
// ConnectDB.getConnection().close();
|
||||||
|
// log.info("Sushi Tmp Tables Created");
|
||||||
|
// } catch (Exception e) {
|
||||||
|
// log.error("Failed to create tables: " + e);
|
||||||
|
// throw new Exception("Failed to create tables: " + e.toString(), e);
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
Statement stmt = ConnectDB.getConnection().createStatement();
|
public void processIrusStats() throws Exception {
|
||||||
String sqlCreateTableSushiLog = "CREATE TABLE IF NOT EXISTS sushilogtmp(source TEXT, repository TEXT, rid TEXT, date TEXT, metric_type TEXT, count INT, PRIMARY KEY(source, repository, rid, date, metric_type));";
|
|
||||||
stmt.executeUpdate(sqlCreateTableSushiLog);
|
|
||||||
|
|
||||||
// stmt.executeUpdate("CREATE TABLE IF NOT EXISTS public.sushilog AS TABLE sushilog;");
|
|
||||||
// String sqlCopyPublicSushiLog = "INSERT INTO sushilog SELECT * FROM public.sushilog;";
|
|
||||||
// stmt.executeUpdate(sqlCopyPublicSushiLog);
|
|
||||||
String sqlcreateRuleSushiLog = "CREATE OR REPLACE RULE ignore_duplicate_inserts AS "
|
|
||||||
+ " ON INSERT TO sushilogtmp "
|
|
||||||
+ " WHERE (EXISTS ( SELECT sushilogtmp.source, sushilogtmp.repository,"
|
|
||||||
+ "sushilogtmp.rid, sushilogtmp.date "
|
|
||||||
+ "FROM sushilogtmp "
|
|
||||||
+ "WHERE sushilogtmp.source = new.source AND sushilogtmp.repository = new.repository AND sushilogtmp.rid = new.rid AND sushilogtmp.date = new.date AND sushilogtmp.metric_type = new.metric_type)) DO INSTEAD NOTHING;";
|
|
||||||
stmt.executeUpdate(sqlcreateRuleSushiLog);
|
|
||||||
|
|
||||||
stmt.close();
|
|
||||||
ConnectDB.getConnection().close();
|
|
||||||
log.info("Sushi Tmp Tables Created");
|
|
||||||
} catch (Exception e) {
|
|
||||||
log.error("Failed to create tables: " + e);
|
|
||||||
throw new Exception("Failed to create tables: " + e.toString(), e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public void irusStats() throws Exception {
|
|
||||||
Statement stmt = ConnectDB.getConnection().createStatement();
|
Statement stmt = ConnectDB.getConnection().createStatement();
|
||||||
ConnectDB.getConnection().setAutoCommit(false);
|
ConnectDB.getConnection().setAutoCommit(false);
|
||||||
|
|
||||||
|
System.out.println("====> Adding JSON Serde jar");
|
||||||
|
stmt.executeUpdate("add jar /usr/share/cmf/common_jars/hive-hcatalog-core-1.1.0-cdh5.14.0.jar");
|
||||||
|
System.out.println("====> Added JSON Serde jar");
|
||||||
|
|
||||||
|
System.out.println("====> Dropping sushilogtmp_json table");
|
||||||
|
String drop_sushilogtmp_json = "DROP TABLE IF EXISTS " +
|
||||||
|
ConnectDB.getUsageStatsDBSchema() +
|
||||||
|
".sushilogtmp_json";
|
||||||
|
stmt.executeUpdate(drop_sushilogtmp_json);
|
||||||
|
System.out.println("====> Dropped sushilogtmp_json table");
|
||||||
|
|
||||||
|
System.out.println("====> Creating sushilogtmp_json table");
|
||||||
|
String create_sushilogtmp_json = "CREATE EXTERNAL TABLE IF NOT EXISTS " +
|
||||||
|
ConnectDB.getUsageStatsDBSchema() + ".sushilogtmp_json(\n" +
|
||||||
|
" `ItemIdentifier` ARRAY<\n" +
|
||||||
|
" struct<\n" +
|
||||||
|
" Type: STRING,\n" +
|
||||||
|
" Value: STRING\n" +
|
||||||
|
" >\n" +
|
||||||
|
" >,\n" +
|
||||||
|
" `ItemPerformance` ARRAY<\n" +
|
||||||
|
" struct<\n" +
|
||||||
|
" `Period`: struct<\n" +
|
||||||
|
" `Begin`: STRING,\n" +
|
||||||
|
" `End`: STRING\n" +
|
||||||
|
" >,\n" +
|
||||||
|
" `Instance`: struct<\n" +
|
||||||
|
" `Count`: STRING,\n" +
|
||||||
|
" `MetricType`: STRING\n" +
|
||||||
|
" >\n" +
|
||||||
|
" >\n" +
|
||||||
|
" >\n" +
|
||||||
|
")\n" +
|
||||||
|
"ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'\n" +
|
||||||
|
"LOCATION '" + UsageStatsExporter.irusUKReportPath + "'\n" +
|
||||||
|
"TBLPROPERTIES (\"transactional\"=\"false\")";
|
||||||
|
stmt.executeUpdate(create_sushilogtmp_json);
|
||||||
|
System.out.println("====> Created sushilogtmp_json table");
|
||||||
|
|
||||||
|
System.out.println("====> Dropping sushilogtmp table");
|
||||||
|
String drop_sushilogtmp = "DROP TABLE IF EXISTS " +
|
||||||
|
ConnectDB.getUsageStatsDBSchema() +
|
||||||
|
".sushilogtmp";
|
||||||
|
stmt.executeUpdate(drop_sushilogtmp);
|
||||||
|
System.out.println("====> Dropped sushilogtmp table");
|
||||||
|
|
||||||
|
System.out.println("====> Creating sushilogtmp table");
|
||||||
|
String create_sushilogtmp = "CREATE TABLE " + ConnectDB.getUsageStatsDBSchema()
|
||||||
|
+ ".sushilogtmp(source STRING, repository STRING, " +
|
||||||
|
"rid STRING, date STRING, metric_type STRING, count INT) clustered by (source) into 100 buckets stored as orc "
|
||||||
|
+
|
||||||
|
"tblproperties('transactional'='true')";
|
||||||
|
stmt.executeUpdate(create_sushilogtmp);
|
||||||
|
System.out.println("====> Created sushilogtmp table");
|
||||||
|
|
||||||
|
|
||||||
|
System.out.println("====> Inserting to sushilogtmp table");
|
||||||
|
String insert_sushilogtmp = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".sushilogtmp " +
|
||||||
|
"SELECT 'IRUS-UK', 'opendoar____::', `ItemIdent`.`Value`, `ItemPerf`.`Period`.`Begin`, " +
|
||||||
|
"`ItemPerf`.`Instance`.`MetricType`, `ItemPerf`.`Instance`.`Count` " +
|
||||||
|
"FROM " + ConnectDB.getUsageStatsDBSchema() + ".sushilogtmp_json " +
|
||||||
|
"LATERAL VIEW posexplode(ItemIdentifier) ItemIdentifierTable AS seqi, ItemIdent " +
|
||||||
|
"LATERAL VIEW posexplode(ItemPerformance) ItemPerformanceTable AS seqp, ItemPerf " +
|
||||||
|
"WHERE `ItemIdent`.`Type`= 'OAI'";
|
||||||
|
stmt.executeUpdate(insert_sushilogtmp);
|
||||||
|
System.out.println("====> Inserted to sushilogtmp table");
|
||||||
|
|
||||||
|
ConnectDB.getConnection().close();
|
||||||
|
System.exit(0);
|
||||||
|
|
||||||
// String sql = "INSERT INTO sushi_result_downloads SELECT s.source, d.id AS repository, ro.id, s.date, s.count
|
// String sql = "INSERT INTO sushi_result_downloads SELECT s.source, d.id AS repository, ro.id, s.date, s.count
|
||||||
// FROM sushilog s, datasource_oids d, result_oids ro WHERE s.repository=d.orid AND s.oai=ro.orid AND
|
// FROM sushilog s, datasource_oids d, result_oids ro WHERE s.repository=d.orid AND s.oai=ro.orid AND
|
||||||
// metric_type='ft_total'";
|
// metric_type='ft_total'";
|
||||||
|
@ -132,7 +202,6 @@ public class IrusStats {
|
||||||
sql = "Insert into sushilog select * from sushilogtmp;";
|
sql = "Insert into sushilog select * from sushilogtmp;";
|
||||||
stmt.executeUpdate(sql);
|
stmt.executeUpdate(sql);
|
||||||
|
|
||||||
ConnectDB.getConnection().commit();
|
|
||||||
ConnectDB.getConnection().close();
|
ConnectDB.getConnection().close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -172,7 +241,7 @@ public class IrusStats {
|
||||||
}
|
}
|
||||||
// break;
|
// break;
|
||||||
}
|
}
|
||||||
|
|
||||||
System.out.println("====> (processIrusRRReport) Finished with report: " + reportUrl);
|
System.out.println("====> (processIrusRRReport) Finished with report: " + reportUrl);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -254,7 +323,7 @@ public class IrusStats {
|
||||||
|
|
||||||
preparedStatement.executeBatch();
|
preparedStatement.executeBatch();
|
||||||
ConnectDB.getConnection().close();
|
ConnectDB.getConnection().close();
|
||||||
|
|
||||||
System.out.println("====> (processIrusIRReport) Finished downloading report(s) with opendoar: " + opendoar);
|
System.out.println("====> (processIrusIRReport) Finished downloading report(s) with opendoar: " + opendoar);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -191,7 +191,7 @@ public class PiwikStatsDB {
|
||||||
System.out.println("====> DownloadsStats processing starts");
|
System.out.println("====> DownloadsStats processing starts");
|
||||||
|
|
||||||
System.out.println("====> Processing portal logs");
|
System.out.println("====> Processing portal logs");
|
||||||
// processPortalLog();
|
processPortalLog();
|
||||||
System.out.println("====> Portal logs process done");
|
System.out.println("====> Portal logs process done");
|
||||||
log.info("portal process done");
|
log.info("portal process done");
|
||||||
|
|
||||||
|
@ -717,6 +717,10 @@ public class PiwikStatsDB {
|
||||||
Statement stmt = ConnectDB.getConnection().createStatement();
|
Statement stmt = ConnectDB.getConnection().createStatement();
|
||||||
ConnectDB.getConnection().setAutoCommit(false);
|
ConnectDB.getConnection().setAutoCommit(false);
|
||||||
|
|
||||||
|
System.out.println("====> Adding JSON Serde jar");
|
||||||
|
stmt.executeUpdate("add jar /usr/share/cmf/common_jars/hive-hcatalog-core-1.1.0-cdh5.14.0.jar");
|
||||||
|
System.out.println("====> Added JSON Serde jar");
|
||||||
|
|
||||||
System.out.println("====> Dropping process_portal_log_tmp_json table");
|
System.out.println("====> Dropping process_portal_log_tmp_json table");
|
||||||
String drop_process_portal_log_tmp_json = "DROP TABLE IF EXISTS " +
|
String drop_process_portal_log_tmp_json = "DROP TABLE IF EXISTS " +
|
||||||
ConnectDB.getUsageStatsDBSchema() +
|
ConnectDB.getUsageStatsDBSchema() +
|
||||||
|
|
|
@ -44,14 +44,6 @@ public class UsageStatsExporter {
|
||||||
// piwd.GetOpenAIRELogs(repoLogPath, portalLogPath, portalMatomoID);
|
// piwd.GetOpenAIRELogs(repoLogPath, portalLogPath, portalMatomoID);
|
||||||
System.out.println("====> Downloaded logs");
|
System.out.println("====> Downloaded logs");
|
||||||
|
|
||||||
// Adding JSON Serde jar needed for creating tables over JSON files
|
|
||||||
Statement stmt = ConnectDB.getConnection().createStatement();
|
|
||||||
ConnectDB.getConnection().setAutoCommit(false);
|
|
||||||
System.out.println("====> Adding JSON Serde jar");
|
|
||||||
stmt.executeUpdate("add jar /usr/share/cmf/common_jars/hive-hcatalog-core-1.1.0-cdh5.14.0.jar");
|
|
||||||
System.out.println("====> Added JSON Serde jar");
|
|
||||||
stmt.close();
|
|
||||||
|
|
||||||
// Create DB tables, insert/update statistics
|
// Create DB tables, insert/update statistics
|
||||||
// String cRobotsUrl = properties.getProperty("COUNTER_robots_Url");
|
// String cRobotsUrl = properties.getProperty("COUNTER_robots_Url");
|
||||||
String cRobotsUrl = "https://raw.githubusercontent.com/atmire/COUNTER-Robots/master/COUNTER_Robots_list.json";
|
String cRobotsUrl = "https://raw.githubusercontent.com/atmire/COUNTER-Robots/master/COUNTER_Robots_list.json";
|
||||||
|
@ -61,15 +53,15 @@ public class UsageStatsExporter {
|
||||||
log.info("process logs done");
|
log.info("process logs done");
|
||||||
|
|
||||||
IrusStats irusstats = new IrusStats(irusUKBaseURL);
|
IrusStats irusstats = new IrusStats(irusUKBaseURL);
|
||||||
// irusstats.processIrusRRReport(irusUKReportPath);
|
// irusstats.getIrusRRReport(irusUKReportPath);
|
||||||
|
|
||||||
irusstats.irusStats();
|
irusstats.processIrusStats();
|
||||||
// log.info("irus done");
|
// log.info("irus done");
|
||||||
|
|
||||||
System.exit(0);
|
System.exit(0);
|
||||||
|
|
||||||
SarcStats sarcStats = new SarcStats();
|
// SarcStats sarcStats = new SarcStats();
|
||||||
sarcStats.processSarc(sarcsReportPath);
|
// sarcStats.processSarc(sarcsReportPath);
|
||||||
// sarcStats.sarcStats();
|
// sarcStats.sarcStats();
|
||||||
log.info("sarc done");
|
log.info("sarc done");
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue