forked from D-Net/dnet-hadoop
Downloading Irus Reports works correctly. Need to add limit on downloaded files for testing reasons. Now we have breakpoints
This commit is contained in:
parent
196946cd6b
commit
95fee808fd
|
@ -43,25 +43,36 @@ public class IrusStats {
|
|||
|
||||
public IrusStats(String irusUKURL) throws Exception {
|
||||
this.irusUKURL = irusUKURL;
|
||||
// createTables();
|
||||
System.out.println("====> Creating Irus Stats tables");
|
||||
createTables();
|
||||
System.out.println("====> Created Irus Stats tables");
|
||||
// The following may not be needed - It will be created when JSON tables are created
|
||||
// createTmpTables();
|
||||
}
|
||||
|
||||
private void createTables() throws Exception {
|
||||
try {
|
||||
|
||||
System.out.println("====> Creating sushilog");
|
||||
Statement stmt = ConnectDB.getConnection().createStatement();
|
||||
String sqlCreateTableSushiLog = "CREATE TABLE IF NOT EXISTS sushilog(source TEXT, repository TEXT, rid TEXT, date TEXT, metric_type TEXT, count INT, PRIMARY KEY(source, repository, rid, date, metric_type));";
|
||||
String sqlCreateTableSushiLog = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema()
|
||||
+ ".sushilog(source STRING, " +
|
||||
"repository STRING, rid STRING, date STRING, metric_type STRING, count INT) clustered by (source, " +
|
||||
"repository, rid, date, metric_type) into 100 buckets stored as orc tblproperties('transactional'='true')";
|
||||
stmt.executeUpdate(sqlCreateTableSushiLog);
|
||||
String sqlcreateRuleSushiLog = "CREATE OR REPLACE RULE ignore_duplicate_inserts AS "
|
||||
+ " ON INSERT TO sushilog "
|
||||
+ " WHERE (EXISTS ( SELECT sushilog.source, sushilog.repository,"
|
||||
+ "sushilog.rid, sushilog.date "
|
||||
+ "FROM sushilog "
|
||||
+ "WHERE sushilog.source = new.source AND sushilog.repository = new.repository AND sushilog.rid = new.rid AND sushilog.date = new.date AND sushilog.metric_type = new.metric_type)) DO INSTEAD NOTHING;";
|
||||
stmt.executeUpdate(sqlcreateRuleSushiLog);
|
||||
String createSushiIndex = "create index if not exists sushilog_duplicates on sushilog(source, repository, rid, date, metric_type);";
|
||||
stmt.executeUpdate(createSushiIndex);
|
||||
System.out.println("====> Created sushilog");
|
||||
|
||||
// To see how to apply to the ignore duplicate rules and indexes
|
||||
// stmt.executeUpdate(sqlCreateTableSushiLog);
|
||||
// String sqlcreateRuleSushiLog = "CREATE OR REPLACE RULE ignore_duplicate_inserts AS "
|
||||
// + " ON INSERT TO sushilog "
|
||||
// + " WHERE (EXISTS ( SELECT sushilog.source, sushilog.repository,"
|
||||
// + "sushilog.rid, sushilog.date "
|
||||
// + "FROM sushilog "
|
||||
// + "WHERE sushilog.source = new.source AND sushilog.repository = new.repository AND sushilog.rid = new.rid AND sushilog.date = new.date AND sushilog.metric_type = new.metric_type)) DO INSTEAD NOTHING;";
|
||||
// stmt.executeUpdate(sqlcreateRuleSushiLog);
|
||||
// String createSushiIndex = "create index if not exists sushilog_duplicates on sushilog(source, repository, rid, date, metric_type);";
|
||||
// stmt.executeUpdate(createSushiIndex);
|
||||
|
||||
stmt.close();
|
||||
ConnectDB.getConnection().close();
|
||||
|
@ -72,6 +83,7 @@ public class IrusStats {
|
|||
}
|
||||
}
|
||||
|
||||
// The following may not be needed - It will be created when JSON tables are created
|
||||
private void createTmpTables() throws Exception {
|
||||
try {
|
||||
|
||||
|
@ -126,19 +138,16 @@ public class IrusStats {
|
|||
|
||||
public void processIrusRRReport(String irusUKReportPath) throws Exception {
|
||||
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM");
|
||||
// String reportUrl = "https://irus.jisc.ac.uk" +
|
||||
// "/api/sushilite/v1_7/GetReport/?Report=RR1&Release=4&RequestorID=OpenAIRE&BeginDate=2016-01&EndDate=" +
|
||||
// simpleDateFormat.format(new Date()) +
|
||||
// "&RepositoryIdentifier=&ItemDataType=&NewJiscBand=&Granularity=Monthly&Callback=";
|
||||
String reportUrl = irusUKURL + "GetReport/?Report=RR1&Release=4&RequestorID=OpenAIRE&BeginDate=2016-01&EndDate="
|
||||
+ simpleDateFormat.format(new Date())
|
||||
+ "&RepositoryIdentifier=&ItemDataType=&NewJiscBand=&Granularity=Monthly&Callback=";
|
||||
|
||||
System.out.println("====> (processIrusRRReport) Getting report: " + reportUrl);
|
||||
log.info("Getting Irus report: " + reportUrl);
|
||||
|
||||
String text = getJson(reportUrl, "", "");
|
||||
|
||||
log.info("Report: " + text);
|
||||
// log.info("Report: " + text);
|
||||
|
||||
JSONParser parser = new JSONParser();
|
||||
JSONObject jsonObject = (JSONObject) parser.parse(text);
|
||||
|
@ -163,10 +172,14 @@ public class IrusStats {
|
|||
}
|
||||
// break;
|
||||
}
|
||||
|
||||
System.out.println("====> (processIrusRRReport) Finished with report: " + reportUrl);
|
||||
}
|
||||
|
||||
private void processIrusIRReport(String opendoar, String irusUKReportPath) throws Exception {
|
||||
System.out.println(opendoar);
|
||||
|
||||
System.out.println("====> (processIrusIRReport) Getting report(s) with opendoar: " + opendoar);
|
||||
|
||||
ConnectDB.getConnection().setAutoCommit(false);
|
||||
|
||||
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM");
|
||||
|
@ -182,7 +195,8 @@ public class IrusStats {
|
|||
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
|
||||
PreparedStatement st = ConnectDB
|
||||
.getConnection()
|
||||
.prepareStatement("SELECT max(date) FROM usagestats_13.sushilog WHERE repository=?");
|
||||
.prepareStatement(
|
||||
"SELECT max(date) FROM " + ConnectDB.getUsageStatsDBSchema() + ".sushilog WHERE repository=?");
|
||||
st.setString(1, "opendoar____::" + opendoar);
|
||||
ResultSet rs_date = st.executeQuery();
|
||||
while (rs_date.next()) {
|
||||
|
@ -236,65 +250,12 @@ public class IrusStats {
|
|||
}
|
||||
|
||||
fin.close();
|
||||
|
||||
// JSONParser parser = new JSONParser();
|
||||
// JSONObject jsonObject = (JSONObject) parser.parse(text);
|
||||
// jsonObject = (JSONObject) jsonObject.get("ReportResponse");
|
||||
// jsonObject = (JSONObject) jsonObject.get("Report");
|
||||
// jsonObject = (JSONObject) jsonObject.get("Report");
|
||||
// jsonObject = (JSONObject) jsonObject.get("Customer");
|
||||
// JSONArray jsonArray = (JSONArray) jsonObject.get("ReportItems");
|
||||
// if (jsonArray == null) {
|
||||
// continue;
|
||||
// }
|
||||
// String oai = "";
|
||||
// for (Object aJsonArray : jsonArray) {
|
||||
// JSONObject jsonObjectRow = (JSONObject) aJsonArray;
|
||||
// JSONArray itemIdentifier = (JSONArray) jsonObjectRow.get("ItemIdentifier");
|
||||
// for (Object identifier : itemIdentifier) {
|
||||
// JSONObject oaiPmh = (JSONObject) identifier;
|
||||
// if (oaiPmh.get("Type").toString().equals("OAI")) {
|
||||
// oai = oaiPmh.get("Value").toString();
|
||||
// // System.out.println("OAI: " + oai);
|
||||
// break;
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// JSONArray itemPerformance = (JSONArray) jsonObjectRow.get("ItemPerformance");
|
||||
// String period;
|
||||
// String type;
|
||||
// String count;
|
||||
// for (Object perf : itemPerformance) {
|
||||
// JSONObject performance = (JSONObject) perf;
|
||||
// JSONObject periodObj = (JSONObject) performance.get("Period");
|
||||
// period = periodObj.get("Begin").toString();
|
||||
// JSONObject instanceObj = (JSONObject) performance.get("Instance");
|
||||
// type = instanceObj.get("MetricType").toString();
|
||||
// count = instanceObj.get("Count").toString();
|
||||
// // System.out.println(oai + " : " + period + " : " + count);
|
||||
//
|
||||
// preparedStatement.setString(1, "IRUS-UK");
|
||||
// preparedStatement.setString(2, "opendoar____::" + opendoar);
|
||||
// preparedStatement.setString(3, oai);
|
||||
// preparedStatement.setString(4, period);
|
||||
// preparedStatement.setString(5, type);
|
||||
// preparedStatement.setInt(6, Integer.parseInt(count));
|
||||
// preparedStatement.addBatch();
|
||||
// batch_size++;
|
||||
// if (batch_size == 10000) {
|
||||
// preparedStatement.executeBatch();
|
||||
// ConnectDB.getConnection().commit();
|
||||
// batch_size = 0;
|
||||
// }
|
||||
// }
|
||||
// // break;
|
||||
// }
|
||||
// break;
|
||||
}
|
||||
|
||||
preparedStatement.executeBatch();
|
||||
ConnectDB.getConnection().commit();
|
||||
ConnectDB.getConnection().close();
|
||||
|
||||
System.out.println("====> (processIrusIRReport) Finished downloading report(s) with opendoar: " + opendoar);
|
||||
}
|
||||
|
||||
private String getJson(String url) throws Exception {
|
||||
|
|
|
@ -202,7 +202,7 @@ public class PiwikStatsDB {
|
|||
System.out.println("====> Portal usagestats process done");
|
||||
|
||||
System.out.println("====> Updating Production Tables");
|
||||
// updateProdTables();
|
||||
updateProdTables();
|
||||
System.out.println("====> Updated Production Tables");
|
||||
log.info("updateProdTables done");
|
||||
|
||||
|
|
|
@ -51,23 +51,23 @@ public class UsageStatsExporter {
|
|||
stmt.executeUpdate("add jar /usr/share/cmf/common_jars/hive-hcatalog-core-1.1.0-cdh5.14.0.jar");
|
||||
System.out.println("====> Added JSON Serde jar");
|
||||
stmt.close();
|
||||
|
||||
|
||||
// Create DB tables, insert/update statistics
|
||||
// String cRobotsUrl = properties.getProperty("COUNTER_robots_Url");
|
||||
String cRobotsUrl = "https://raw.githubusercontent.com/atmire/COUNTER-Robots/master/COUNTER_Robots_list.json";
|
||||
piwikstatsdb.setCounterRobotsURL(cRobotsUrl);
|
||||
System.out.println("====> Processing logs");
|
||||
piwikstatsdb.processLogs();
|
||||
// piwikstatsdb.processLogs();
|
||||
log.info("process logs done");
|
||||
|
||||
System.exit(0);
|
||||
|
||||
IrusStats irusstats = new IrusStats(irusUKBaseURL);
|
||||
irusstats.processIrusRRReport(irusUKReportPath);
|
||||
|
||||
// irusstats.irusStats();
|
||||
// log.info("irus done");
|
||||
//
|
||||
|
||||
System.exit(0);
|
||||
|
||||
SarcStats sarcStats = new SarcStats();
|
||||
sarcStats.processSarc(sarcsReportPath);
|
||||
// sarcStats.sarcStats();
|
||||
|
|
Loading…
Reference in New Issue