Changes to download Irus Stats

This commit is contained in:
Spyros Zoupanos 2020-07-22 19:22:04 +03:00
parent 4c00343bbd
commit c035fa7648
3 changed files with 129 additions and 204 deletions

View File

@ -20,6 +20,10 @@ import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.Logger;
import org.json.simple.JSONArray;
import org.json.simple.JSONObject;
@ -39,8 +43,8 @@ public class IrusStats {
public IrusStats(String irusUKURL) throws Exception {
this.irusUKURL = irusUKURL;
createTables();
createTmpTables();
// createTables();
// createTmpTables();
}
private void createTables() throws Exception {
@ -120,7 +124,7 @@ public class IrusStats {
ConnectDB.getConnection().close();
}
public void processIrusRRReport() throws Exception {
public void processIrusRRReport(String irusUKReportPath) throws Exception {
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM");
// String reportUrl = "https://irus.jisc.ac.uk" +
// "/api/sushilite/v1_7/GetReport/?Report=RR1&Release=4&RequestorID=OpenAIRE&BeginDate=2016-01&EndDate=" +
@ -153,7 +157,7 @@ public class IrusStats {
// System.out.println(i + ": " + opendoar.get("Value").toString());
log.info(i + ": " + opendoar.get("Value").toString());
i++;
processIrusIRReport(opendoar.get("Value").toString());
processIrusIRReport(opendoar.get("Value").toString(), irusUKReportPath);
break;
}
}
@ -161,7 +165,7 @@ public class IrusStats {
}
}
private void processIrusIRReport(String opendoar) throws Exception {
private void processIrusIRReport(String opendoar, String irusUKReportPath) throws Exception {
System.out.println(opendoar);
ConnectDB.getConnection().setAutoCommit(false);
@ -178,7 +182,7 @@ public class IrusStats {
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
PreparedStatement st = ConnectDB
.getConnection()
.prepareStatement("SELECT max(date) FROM sushilog WHERE repository=?;");
.prepareStatement("SELECT max(date) FROM usagestats_13.sushilog WHERE repository=?");
st.setString(1, "opendoar____::" + opendoar);
ResultSet rs_date = st.executeQuery();
while (rs_date.next()) {
@ -202,11 +206,18 @@ public class IrusStats {
+ "&ItemIdentifier=&ItemDataType=&hasDOI=&Granularity=Monthly&Callback=";
start.add(Calendar.MONTH, 1);
System.out.println("Downloading file: " + reportUrl);
String text = getJson(reportUrl, "", "");
if (text == null) {
continue;
}
FileSystem fs = FileSystem.get(new Configuration());
String filePath = irusUKReportPath + "/" + "IrusIRReport_" +
opendoar + "_" + simpleDateFormat.format(start.getTime()) + ".json";
System.out.println("Storing to file: " + filePath);
FSDataOutputStream fin = fs.create(new Path(filePath), true);
JSONParser parser = new JSONParser();
JSONObject jsonObject = (JSONObject) parser.parse(text);
jsonObject = (JSONObject) jsonObject.get("ReportResponse");
@ -220,45 +231,64 @@ public class IrusStats {
String oai = "";
for (Object aJsonArray : jsonArray) {
JSONObject jsonObjectRow = (JSONObject) aJsonArray;
JSONArray itemIdentifier = (JSONArray) jsonObjectRow.get("ItemIdentifier");
for (Object identifier : itemIdentifier) {
JSONObject oaiPmh = (JSONObject) identifier;
if (oaiPmh.get("Type").toString().equals("OAI")) {
oai = oaiPmh.get("Value").toString();
// System.out.println("OAI: " + oai);
break;
}
}
JSONArray itemPerformance = (JSONArray) jsonObjectRow.get("ItemPerformance");
String period;
String type;
String count;
for (Object perf : itemPerformance) {
JSONObject performance = (JSONObject) perf;
JSONObject periodObj = (JSONObject) performance.get("Period");
period = periodObj.get("Begin").toString();
JSONObject instanceObj = (JSONObject) performance.get("Instance");
type = instanceObj.get("MetricType").toString();
count = instanceObj.get("Count").toString();
// System.out.println(oai + " : " + period + " : " + count);
preparedStatement.setString(1, "IRUS-UK");
preparedStatement.setString(2, "opendoar____::" + opendoar);
preparedStatement.setString(3, oai);
preparedStatement.setString(4, period);
preparedStatement.setString(5, type);
preparedStatement.setInt(6, Integer.parseInt(count));
preparedStatement.addBatch();
batch_size++;
if (batch_size == 10000) {
preparedStatement.executeBatch();
ConnectDB.getConnection().commit();
batch_size = 0;
}
}
// break;
fin.write(jsonObjectRow.toJSONString().getBytes());
fin.writeChar('\n');
}
fin.close();
// JSONParser parser = new JSONParser();
// JSONObject jsonObject = (JSONObject) parser.parse(text);
// jsonObject = (JSONObject) jsonObject.get("ReportResponse");
// jsonObject = (JSONObject) jsonObject.get("Report");
// jsonObject = (JSONObject) jsonObject.get("Report");
// jsonObject = (JSONObject) jsonObject.get("Customer");
// JSONArray jsonArray = (JSONArray) jsonObject.get("ReportItems");
// if (jsonArray == null) {
// continue;
// }
// String oai = "";
// for (Object aJsonArray : jsonArray) {
// JSONObject jsonObjectRow = (JSONObject) aJsonArray;
// JSONArray itemIdentifier = (JSONArray) jsonObjectRow.get("ItemIdentifier");
// for (Object identifier : itemIdentifier) {
// JSONObject oaiPmh = (JSONObject) identifier;
// if (oaiPmh.get("Type").toString().equals("OAI")) {
// oai = oaiPmh.get("Value").toString();
// // System.out.println("OAI: " + oai);
// break;
// }
// }
//
// JSONArray itemPerformance = (JSONArray) jsonObjectRow.get("ItemPerformance");
// String period;
// String type;
// String count;
// for (Object perf : itemPerformance) {
// JSONObject performance = (JSONObject) perf;
// JSONObject periodObj = (JSONObject) performance.get("Period");
// period = periodObj.get("Begin").toString();
// JSONObject instanceObj = (JSONObject) performance.get("Instance");
// type = instanceObj.get("MetricType").toString();
// count = instanceObj.get("Count").toString();
// // System.out.println(oai + " : " + period + " : " + count);
//
// preparedStatement.setString(1, "IRUS-UK");
// preparedStatement.setString(2, "opendoar____::" + opendoar);
// preparedStatement.setString(3, oai);
// preparedStatement.setString(4, period);
// preparedStatement.setString(5, type);
// preparedStatement.setInt(6, Integer.parseInt(count));
// preparedStatement.addBatch();
// batch_size++;
// if (batch_size == 10000) {
// preparedStatement.executeBatch();
// ConnectDB.getConnection().commit();
// batch_size = 0;
// }
// }
// // break;
// }
// break;
}
@ -267,141 +297,32 @@ public class IrusStats {
ConnectDB.getConnection().close();
}
public void processIrusIRReport(String opendoar, String startDate) throws Exception {
ConnectDB.getConnection().setAutoCommit(false);
private String getJson(String url) throws Exception {
try {
System.out.println("===> Connecting to: " + url);
URL website = new URL(url);
System.out.println("Connection url -----> " + url);
URLConnection connection = website.openConnection();
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM");
Calendar start = Calendar.getInstance();
start.set(Calendar.YEAR, 2016);
start.set(Calendar.MONTH, Calendar.JANUARY);
// start.setTime(simpleDateFormat.parse("2016-01"));
Calendar end = Calendar.getInstance();
end.add(Calendar.DAY_OF_MONTH, -1);
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
start.setTime(sdf.parse(startDate));
String createTablesQuery = "-- Table: shadow.sushilog" + opendoar + "\n"
+ "\n"
+ "-- DROP TABLE shadow.sushilog" + opendoar + ";\n"
+ "\n"
+ "CREATE TABLE shadow.sushilog" + opendoar + "\n"
+ "(\n"
+ " source text COLLATE pg_catalog.\"default\" NOT NULL,\n"
+ " repository text COLLATE pg_catalog.\"default\" NOT NULL,\n"
+ " rid text COLLATE pg_catalog.\"default\" NOT NULL,\n"
+ " date text COLLATE pg_catalog.\"default\" NOT NULL,\n"
+ " metric_type text COLLATE pg_catalog.\"default\" NOT NULL,\n"
+ " count integer,\n"
+ " CONSTRAINT sushilog" + opendoar + "_pkey PRIMARY KEY (source, repository, rid, date, metric_type)\n"
+ " USING INDEX TABLESPACE index_storage\n"
+ ")\n"
+ "\n"
+ "TABLESPACE pg_default;\n"
+ "\n"
+ "ALTER TABLE shadow.sushilog" + opendoar + "\n"
+ " OWNER to sqoop;\n"
+ "\n"
+ "-- Rule: ignore_duplicate_inserts ON shadow.sushilog" + opendoar + "\n"
+ "\n"
+ "-- DROP Rule ignore_duplicate_inserts ON shadow.sushilog" + opendoar + ";\n"
+ "\n"
+ "CREATE OR REPLACE RULE ignore_duplicate_inserts AS\n"
+ " ON INSERT TO shadow.sushilog" + opendoar + "\n"
+ " WHERE (EXISTS ( SELECT sushilog" + opendoar + ".source,\n"
+ " sushilog" + opendoar + ".repository,\n"
+ " sushilog" + opendoar + ".rid,\n"
+ " sushilog" + opendoar + ".date\n"
+ " FROM sushilog" + opendoar + "\n"
+ " WHERE sushilog" + opendoar + ".source = new.source AND sushilog" + opendoar
+ ".repository = new.repository AND sushilog" + opendoar + ".rid = new.rid AND sushilog" + opendoar
+ ".date = new.date AND sushilog" + opendoar + ".metric_type = new.metric_type))\n"
+ " DO INSTEAD\n"
+ "NOTHING;";
Statement stCreateTables = ConnectDB.getConnection().createStatement();
stCreateTables.execute(createTablesQuery);
ConnectDB.getConnection().commit();
PreparedStatement preparedStatement = ConnectDB
.getConnection()
.prepareStatement(
"INSERT INTO sushilog" + opendoar
+ " (source, repository, rid, date, metric_type, count) VALUES (?,?,?,?,?,?)");
int batch_size = 0;
while (start.before(end)) {
// log.info("date: " + simpleDateFormat.format(start.getTime()));
String reportUrl = "https://irus.jisc.ac.uk/api/sushilite/v1_7/GetReport/?Report=IR1&Release=4&RequestorID=OpenAIRE&BeginDate="
+ simpleDateFormat.format(start.getTime()) + "&EndDate=2019-10-31&RepositoryIdentifier=opendoar%3A"
+ opendoar + "&ItemIdentifier=&ItemDataType=&hasDOI=&Granularity=Monthly&Callback=";
start.add(Calendar.MONTH, 1);
String text = getJson(reportUrl, "", "");
if (text == null) {
continue;
}
JSONParser parser = new JSONParser();
JSONObject jsonObject = (JSONObject) parser.parse(text);
jsonObject = (JSONObject) jsonObject.get("ReportResponse");
jsonObject = (JSONObject) jsonObject.get("Report");
jsonObject = (JSONObject) jsonObject.get("Report");
jsonObject = (JSONObject) jsonObject.get("Customer");
JSONArray jsonArray = (JSONArray) jsonObject.get("ReportItems");
if (jsonArray == null) {
continue;
}
String oai = "";
for (Object aJsonArray : jsonArray) {
JSONObject jsonObjectRow = (JSONObject) aJsonArray;
JSONArray itemIdentifier = (JSONArray) jsonObjectRow.get("ItemIdentifier");
for (Object identifier : itemIdentifier) {
JSONObject oaiPmh = (JSONObject) identifier;
if (oaiPmh.get("Type").toString().equals("OAI")) {
oai = oaiPmh.get("Value").toString();
// System.out.println("OAI: " + oai);
break;
}
// connection.setRequestProperty ("Authorization", "Basic "+encoded);
StringBuilder response;
try (BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream()))) {
response = new StringBuilder();
String inputLine;
while ((inputLine = in.readLine()) != null) {
response.append(inputLine);
// response.append("\n");
}
JSONArray itemPerformance = (JSONArray) jsonObjectRow.get("ItemPerformance");
String period;
String type;
String count;
for (Object perf : itemPerformance) {
JSONObject performance = (JSONObject) perf;
JSONObject periodObj = (JSONObject) performance.get("Period");
period = periodObj.get("Begin").toString();
JSONObject instanceObj = (JSONObject) performance.get("Instance");
type = instanceObj.get("MetricType").toString();
count = instanceObj.get("Count").toString();
// System.out.println(oai + " : " + period + " : " + count);
preparedStatement.setString(1, "IRUS-UK");
preparedStatement.setString(2, "opendoar____::" + opendoar);
preparedStatement.setString(3, oai);
preparedStatement.setString(4, period);
preparedStatement.setString(5, type);
preparedStatement.setInt(6, Integer.parseInt(count));
preparedStatement.addBatch();
batch_size++;
if (batch_size == 10000) {
preparedStatement.executeBatch();
ConnectDB.getConnection().commit();
batch_size = 0;
}
}
// break;
}
// break;
System.out.println("response ====> " + response.toString());
return response.toString();
} catch (Exception e) {
log.error("Failed to get URL: " + e);
System.out.println("Failed to get URL: " + e);
throw new Exception("Failed to get URL: " + e.toString(), e);
}
preparedStatement.executeBatch();
ConnectDB.getConnection().commit();
ConnectDB.getConnection().close();
}
private String getJson(String url, String username, String password) throws Exception {

View File

@ -27,7 +27,7 @@ public class SarcStats {
private final Logger log = Logger.getLogger(this.getClass());
public SarcStats() throws Exception {
createTables();
// createTables();
}
private void createTables() throws Exception {
@ -228,7 +228,7 @@ public class SarcStats {
ConnectDB.getConnection().close();
}
private String getJson(String url) {
private String getJson(String url) throws Exception {
// String cred=username+":"+password;
// String encoded = new sun.misc.BASE64Encoder().encode (cred.getBytes());
try {
@ -246,10 +246,11 @@ public class SarcStats {
}
return response.toString();
} catch (Exception e) {
log.error("Failed to get URL: " + e);
// System.out.println("Failed to get URL: " + e);
return null;
// throw new Exception("Failed to get URL: " + e.toString(), e);
System.out.println("Failed to get URL: " + e);
// return null;
throw new Exception("Failed to get URL: " + e.toString(), e);
}
}
}

View File

@ -32,38 +32,41 @@ public class UsageStatsExporter {
String portalMatomoID = "109";
String irusUKBaseURL = "https://irus.jisc.ac.uk/api/sushilite/v1_7/";
String irusUKReportPath = "/user/spyros/logs/usage_stats_logs/irusUKReports";
// connect to DB
ConnectDB.init(properties);
// Create DB tables - they are also needed to download the statistics too
PiwikStatsDB piwikstatsdb = new PiwikStatsDB(repoLogPath, portalLogPath);
// // Create DB tables - they are also needed to download the statistics too
// PiwikStatsDB piwikstatsdb = new PiwikStatsDB(repoLogPath, portalLogPath);
//
// // Download the statistics - The following 2 lines are not needed after the download - Commenting them out for
// // the moment
// PiwikDownloadLogs piwd = new PiwikDownloadLogs(matomoBaseURL, matomoAuthToken);
// piwd.GetOpenAIRELogs(repoLogPath, portalLogPath, portalMatomoID);
//
// System.exit(0);
//
// // Create DB tables, insert/update statistics
//// String cRobotsUrl = properties.getProperty("COUNTER_robots_Url");
// String cRobotsUrl = "https://raw.githubusercontent.com/atmire/COUNTER-Robots/master/COUNTER_Robots_list.json";
// piwikstatsdb.setCounterRobotsURL(cRobotsUrl);
// piwikstatsdb.processLogs();
// log.info("process logs done");
// Download the statistics - The following 2 lines are not needed after the download - Commenting them out for
// the moment
PiwikDownloadLogs piwd = new PiwikDownloadLogs(matomoBaseURL, matomoAuthToken);
piwd.GetOpenAIRELogs(repoLogPath, portalLogPath, portalMatomoID);
System.exit(0);
// Create DB tables, insert/update statistics
// String cRobotsUrl = properties.getProperty("COUNTER_robots_Url");
String cRobotsUrl = "https://raw.githubusercontent.com/atmire/COUNTER-Robots/master/COUNTER_Robots_list.json";
piwikstatsdb.setCounterRobotsURL(cRobotsUrl);
piwikstatsdb.processLogs();
log.info("process logs done");
IrusStats irusstats = new IrusStats(irusUKBaseURL);
irusstats.processIrusRRReport();
irusstats.irusStats();
log.info("irus done");
// IrusStats irusstats = new IrusStats(irusUKBaseURL);
// irusstats.processIrusRRReport(irusUKReportPath);
// irusstats.irusStats();
// log.info("irus done");
//
SarcStats sarcStats = new SarcStats();
sarcStats.processSarc();
sarcStats.sarcStats();
// sarcStats.sarcStats();
log.info("sarc done");
// finalize usagestats
piwikstatsdb.finalizeStats();
log.info("finalized stats");
// // finalize usagestats
// piwikstatsdb.finalizeStats();
// log.info("finalized stats");
}
}