Downloading Irus Reports works correctly. Need to add limit on downloaded files for testing reasons. Now we have breakpoints

This commit is contained in:
Spyros Zoupanos 2020-09-13 14:51:45 +03:00
parent 196946cd6b
commit 95fee808fd
3 changed files with 40 additions and 79 deletions

View File

@ -43,25 +43,36 @@ public class IrusStats {
public IrusStats(String irusUKURL) throws Exception {
this.irusUKURL = irusUKURL;
// createTables();
System.out.println("====> Creating Irus Stats tables");
createTables();
System.out.println("====> Created Irus Stats tables");
// The following may not be needed - It will be created when JSON tables are created
// createTmpTables();
}
private void createTables() throws Exception {
try {
System.out.println("====> Creating sushilog");
Statement stmt = ConnectDB.getConnection().createStatement();
String sqlCreateTableSushiLog = "CREATE TABLE IF NOT EXISTS sushilog(source TEXT, repository TEXT, rid TEXT, date TEXT, metric_type TEXT, count INT, PRIMARY KEY(source, repository, rid, date, metric_type));";
String sqlCreateTableSushiLog = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema()
+ ".sushilog(source STRING, " +
"repository STRING, rid STRING, date STRING, metric_type STRING, count INT) clustered by (source, " +
"repository, rid, date, metric_type) into 100 buckets stored as orc tblproperties('transactional'='true')";
stmt.executeUpdate(sqlCreateTableSushiLog);
String sqlcreateRuleSushiLog = "CREATE OR REPLACE RULE ignore_duplicate_inserts AS "
+ " ON INSERT TO sushilog "
+ " WHERE (EXISTS ( SELECT sushilog.source, sushilog.repository,"
+ "sushilog.rid, sushilog.date "
+ "FROM sushilog "
+ "WHERE sushilog.source = new.source AND sushilog.repository = new.repository AND sushilog.rid = new.rid AND sushilog.date = new.date AND sushilog.metric_type = new.metric_type)) DO INSTEAD NOTHING;";
stmt.executeUpdate(sqlcreateRuleSushiLog);
String createSushiIndex = "create index if not exists sushilog_duplicates on sushilog(source, repository, rid, date, metric_type);";
stmt.executeUpdate(createSushiIndex);
System.out.println("====> Created sushilog");
// To see how to apply to the ignore duplicate rules and indexes
// stmt.executeUpdate(sqlCreateTableSushiLog);
// String sqlcreateRuleSushiLog = "CREATE OR REPLACE RULE ignore_duplicate_inserts AS "
// + " ON INSERT TO sushilog "
// + " WHERE (EXISTS ( SELECT sushilog.source, sushilog.repository,"
// + "sushilog.rid, sushilog.date "
// + "FROM sushilog "
// + "WHERE sushilog.source = new.source AND sushilog.repository = new.repository AND sushilog.rid = new.rid AND sushilog.date = new.date AND sushilog.metric_type = new.metric_type)) DO INSTEAD NOTHING;";
// stmt.executeUpdate(sqlcreateRuleSushiLog);
// String createSushiIndex = "create index if not exists sushilog_duplicates on sushilog(source, repository, rid, date, metric_type);";
// stmt.executeUpdate(createSushiIndex);
stmt.close();
ConnectDB.getConnection().close();
@ -72,6 +83,7 @@ public class IrusStats {
}
}
// The following may not be needed - It will be created when JSON tables are created
private void createTmpTables() throws Exception {
try {
@ -126,19 +138,16 @@ public class IrusStats {
public void processIrusRRReport(String irusUKReportPath) throws Exception {
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM");
// String reportUrl = "https://irus.jisc.ac.uk" +
// "/api/sushilite/v1_7/GetReport/?Report=RR1&Release=4&RequestorID=OpenAIRE&BeginDate=2016-01&EndDate=" +
// simpleDateFormat.format(new Date()) +
// "&RepositoryIdentifier=&ItemDataType=&NewJiscBand=&Granularity=Monthly&Callback=";
String reportUrl = irusUKURL + "GetReport/?Report=RR1&Release=4&RequestorID=OpenAIRE&BeginDate=2016-01&EndDate="
+ simpleDateFormat.format(new Date())
+ "&RepositoryIdentifier=&ItemDataType=&NewJiscBand=&Granularity=Monthly&Callback=";
System.out.println("====> (processIrusRRReport) Getting report: " + reportUrl);
log.info("Getting Irus report: " + reportUrl);
String text = getJson(reportUrl, "", "");
log.info("Report: " + text);
// log.info("Report: " + text);
JSONParser parser = new JSONParser();
JSONObject jsonObject = (JSONObject) parser.parse(text);
@ -163,10 +172,14 @@ public class IrusStats {
}
// break;
}
System.out.println("====> (processIrusRRReport) Finished with report: " + reportUrl);
}
private void processIrusIRReport(String opendoar, String irusUKReportPath) throws Exception {
System.out.println(opendoar);
System.out.println("====> (processIrusIRReport) Getting report(s) with opendoar: " + opendoar);
ConnectDB.getConnection().setAutoCommit(false);
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM");
@ -182,7 +195,8 @@ public class IrusStats {
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
PreparedStatement st = ConnectDB
.getConnection()
.prepareStatement("SELECT max(date) FROM usagestats_13.sushilog WHERE repository=?");
.prepareStatement(
"SELECT max(date) FROM " + ConnectDB.getUsageStatsDBSchema() + ".sushilog WHERE repository=?");
st.setString(1, "opendoar____::" + opendoar);
ResultSet rs_date = st.executeQuery();
while (rs_date.next()) {
@ -236,65 +250,12 @@ public class IrusStats {
}
fin.close();
// JSONParser parser = new JSONParser();
// JSONObject jsonObject = (JSONObject) parser.parse(text);
// jsonObject = (JSONObject) jsonObject.get("ReportResponse");
// jsonObject = (JSONObject) jsonObject.get("Report");
// jsonObject = (JSONObject) jsonObject.get("Report");
// jsonObject = (JSONObject) jsonObject.get("Customer");
// JSONArray jsonArray = (JSONArray) jsonObject.get("ReportItems");
// if (jsonArray == null) {
// continue;
// }
// String oai = "";
// for (Object aJsonArray : jsonArray) {
// JSONObject jsonObjectRow = (JSONObject) aJsonArray;
// JSONArray itemIdentifier = (JSONArray) jsonObjectRow.get("ItemIdentifier");
// for (Object identifier : itemIdentifier) {
// JSONObject oaiPmh = (JSONObject) identifier;
// if (oaiPmh.get("Type").toString().equals("OAI")) {
// oai = oaiPmh.get("Value").toString();
// // System.out.println("OAI: " + oai);
// break;
// }
// }
//
// JSONArray itemPerformance = (JSONArray) jsonObjectRow.get("ItemPerformance");
// String period;
// String type;
// String count;
// for (Object perf : itemPerformance) {
// JSONObject performance = (JSONObject) perf;
// JSONObject periodObj = (JSONObject) performance.get("Period");
// period = periodObj.get("Begin").toString();
// JSONObject instanceObj = (JSONObject) performance.get("Instance");
// type = instanceObj.get("MetricType").toString();
// count = instanceObj.get("Count").toString();
// // System.out.println(oai + " : " + period + " : " + count);
//
// preparedStatement.setString(1, "IRUS-UK");
// preparedStatement.setString(2, "opendoar____::" + opendoar);
// preparedStatement.setString(3, oai);
// preparedStatement.setString(4, period);
// preparedStatement.setString(5, type);
// preparedStatement.setInt(6, Integer.parseInt(count));
// preparedStatement.addBatch();
// batch_size++;
// if (batch_size == 10000) {
// preparedStatement.executeBatch();
// ConnectDB.getConnection().commit();
// batch_size = 0;
// }
// }
// // break;
// }
// break;
}
preparedStatement.executeBatch();
ConnectDB.getConnection().commit();
ConnectDB.getConnection().close();
System.out.println("====> (processIrusIRReport) Finished downloading report(s) with opendoar: " + opendoar);
}
private String getJson(String url) throws Exception {

View File

@ -202,7 +202,7 @@ public class PiwikStatsDB {
System.out.println("====> Portal usagestats process done");
System.out.println("====> Updating Production Tables");
// updateProdTables();
updateProdTables();
System.out.println("====> Updated Production Tables");
log.info("updateProdTables done");

View File

@ -51,23 +51,23 @@ public class UsageStatsExporter {
stmt.executeUpdate("add jar /usr/share/cmf/common_jars/hive-hcatalog-core-1.1.0-cdh5.14.0.jar");
System.out.println("====> Added JSON Serde jar");
stmt.close();
// Create DB tables, insert/update statistics
// String cRobotsUrl = properties.getProperty("COUNTER_robots_Url");
String cRobotsUrl = "https://raw.githubusercontent.com/atmire/COUNTER-Robots/master/COUNTER_Robots_list.json";
piwikstatsdb.setCounterRobotsURL(cRobotsUrl);
System.out.println("====> Processing logs");
piwikstatsdb.processLogs();
// piwikstatsdb.processLogs();
log.info("process logs done");
System.exit(0);
IrusStats irusstats = new IrusStats(irusUKBaseURL);
irusstats.processIrusRRReport(irusUKReportPath);
// irusstats.irusStats();
// log.info("irus done");
//
System.exit(0);
SarcStats sarcStats = new SarcStats();
sarcStats.processSarc(sarcsReportPath);
// sarcStats.sarcStats();