forked from D-Net/dnet-hadoop
Changes to download Irus Stats
This commit is contained in:
parent
4c00343bbd
commit
c035fa7648
|
@ -20,6 +20,10 @@ import java.text.SimpleDateFormat;
|
|||
import java.util.Calendar;
|
||||
import java.util.Date;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.json.simple.JSONArray;
|
||||
import org.json.simple.JSONObject;
|
||||
|
@ -39,8 +43,8 @@ public class IrusStats {
|
|||
|
||||
public IrusStats(String irusUKURL) throws Exception {
|
||||
this.irusUKURL = irusUKURL;
|
||||
createTables();
|
||||
createTmpTables();
|
||||
// createTables();
|
||||
// createTmpTables();
|
||||
}
|
||||
|
||||
private void createTables() throws Exception {
|
||||
|
@ -120,7 +124,7 @@ public class IrusStats {
|
|||
ConnectDB.getConnection().close();
|
||||
}
|
||||
|
||||
public void processIrusRRReport() throws Exception {
|
||||
public void processIrusRRReport(String irusUKReportPath) throws Exception {
|
||||
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM");
|
||||
// String reportUrl = "https://irus.jisc.ac.uk" +
|
||||
// "/api/sushilite/v1_7/GetReport/?Report=RR1&Release=4&RequestorID=OpenAIRE&BeginDate=2016-01&EndDate=" +
|
||||
|
@ -153,7 +157,7 @@ public class IrusStats {
|
|||
// System.out.println(i + ": " + opendoar.get("Value").toString());
|
||||
log.info(i + ": " + opendoar.get("Value").toString());
|
||||
i++;
|
||||
processIrusIRReport(opendoar.get("Value").toString());
|
||||
processIrusIRReport(opendoar.get("Value").toString(), irusUKReportPath);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -161,7 +165,7 @@ public class IrusStats {
|
|||
}
|
||||
}
|
||||
|
||||
private void processIrusIRReport(String opendoar) throws Exception {
|
||||
private void processIrusIRReport(String opendoar, String irusUKReportPath) throws Exception {
|
||||
System.out.println(opendoar);
|
||||
ConnectDB.getConnection().setAutoCommit(false);
|
||||
|
||||
|
@ -178,7 +182,7 @@ public class IrusStats {
|
|||
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
|
||||
PreparedStatement st = ConnectDB
|
||||
.getConnection()
|
||||
.prepareStatement("SELECT max(date) FROM sushilog WHERE repository=?;");
|
||||
.prepareStatement("SELECT max(date) FROM usagestats_13.sushilog WHERE repository=?");
|
||||
st.setString(1, "opendoar____::" + opendoar);
|
||||
ResultSet rs_date = st.executeQuery();
|
||||
while (rs_date.next()) {
|
||||
|
@ -202,11 +206,18 @@ public class IrusStats {
|
|||
+ "&ItemIdentifier=&ItemDataType=&hasDOI=&Granularity=Monthly&Callback=";
|
||||
start.add(Calendar.MONTH, 1);
|
||||
|
||||
System.out.println("Downloading file: " + reportUrl);
|
||||
String text = getJson(reportUrl, "", "");
|
||||
if (text == null) {
|
||||
continue;
|
||||
}
|
||||
|
||||
FileSystem fs = FileSystem.get(new Configuration());
|
||||
String filePath = irusUKReportPath + "/" + "IrusIRReport_" +
|
||||
opendoar + "_" + simpleDateFormat.format(start.getTime()) + ".json";
|
||||
System.out.println("Storing to file: " + filePath);
|
||||
FSDataOutputStream fin = fs.create(new Path(filePath), true);
|
||||
|
||||
JSONParser parser = new JSONParser();
|
||||
JSONObject jsonObject = (JSONObject) parser.parse(text);
|
||||
jsonObject = (JSONObject) jsonObject.get("ReportResponse");
|
||||
|
@ -220,45 +231,64 @@ public class IrusStats {
|
|||
String oai = "";
|
||||
for (Object aJsonArray : jsonArray) {
|
||||
JSONObject jsonObjectRow = (JSONObject) aJsonArray;
|
||||
JSONArray itemIdentifier = (JSONArray) jsonObjectRow.get("ItemIdentifier");
|
||||
for (Object identifier : itemIdentifier) {
|
||||
JSONObject oaiPmh = (JSONObject) identifier;
|
||||
if (oaiPmh.get("Type").toString().equals("OAI")) {
|
||||
oai = oaiPmh.get("Value").toString();
|
||||
// System.out.println("OAI: " + oai);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
JSONArray itemPerformance = (JSONArray) jsonObjectRow.get("ItemPerformance");
|
||||
String period;
|
||||
String type;
|
||||
String count;
|
||||
for (Object perf : itemPerformance) {
|
||||
JSONObject performance = (JSONObject) perf;
|
||||
JSONObject periodObj = (JSONObject) performance.get("Period");
|
||||
period = periodObj.get("Begin").toString();
|
||||
JSONObject instanceObj = (JSONObject) performance.get("Instance");
|
||||
type = instanceObj.get("MetricType").toString();
|
||||
count = instanceObj.get("Count").toString();
|
||||
// System.out.println(oai + " : " + period + " : " + count);
|
||||
|
||||
preparedStatement.setString(1, "IRUS-UK");
|
||||
preparedStatement.setString(2, "opendoar____::" + opendoar);
|
||||
preparedStatement.setString(3, oai);
|
||||
preparedStatement.setString(4, period);
|
||||
preparedStatement.setString(5, type);
|
||||
preparedStatement.setInt(6, Integer.parseInt(count));
|
||||
preparedStatement.addBatch();
|
||||
batch_size++;
|
||||
if (batch_size == 10000) {
|
||||
preparedStatement.executeBatch();
|
||||
ConnectDB.getConnection().commit();
|
||||
batch_size = 0;
|
||||
}
|
||||
}
|
||||
// break;
|
||||
fin.write(jsonObjectRow.toJSONString().getBytes());
|
||||
fin.writeChar('\n');
|
||||
}
|
||||
|
||||
fin.close();
|
||||
|
||||
// JSONParser parser = new JSONParser();
|
||||
// JSONObject jsonObject = (JSONObject) parser.parse(text);
|
||||
// jsonObject = (JSONObject) jsonObject.get("ReportResponse");
|
||||
// jsonObject = (JSONObject) jsonObject.get("Report");
|
||||
// jsonObject = (JSONObject) jsonObject.get("Report");
|
||||
// jsonObject = (JSONObject) jsonObject.get("Customer");
|
||||
// JSONArray jsonArray = (JSONArray) jsonObject.get("ReportItems");
|
||||
// if (jsonArray == null) {
|
||||
// continue;
|
||||
// }
|
||||
// String oai = "";
|
||||
// for (Object aJsonArray : jsonArray) {
|
||||
// JSONObject jsonObjectRow = (JSONObject) aJsonArray;
|
||||
// JSONArray itemIdentifier = (JSONArray) jsonObjectRow.get("ItemIdentifier");
|
||||
// for (Object identifier : itemIdentifier) {
|
||||
// JSONObject oaiPmh = (JSONObject) identifier;
|
||||
// if (oaiPmh.get("Type").toString().equals("OAI")) {
|
||||
// oai = oaiPmh.get("Value").toString();
|
||||
// // System.out.println("OAI: " + oai);
|
||||
// break;
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// JSONArray itemPerformance = (JSONArray) jsonObjectRow.get("ItemPerformance");
|
||||
// String period;
|
||||
// String type;
|
||||
// String count;
|
||||
// for (Object perf : itemPerformance) {
|
||||
// JSONObject performance = (JSONObject) perf;
|
||||
// JSONObject periodObj = (JSONObject) performance.get("Period");
|
||||
// period = periodObj.get("Begin").toString();
|
||||
// JSONObject instanceObj = (JSONObject) performance.get("Instance");
|
||||
// type = instanceObj.get("MetricType").toString();
|
||||
// count = instanceObj.get("Count").toString();
|
||||
// // System.out.println(oai + " : " + period + " : " + count);
|
||||
//
|
||||
// preparedStatement.setString(1, "IRUS-UK");
|
||||
// preparedStatement.setString(2, "opendoar____::" + opendoar);
|
||||
// preparedStatement.setString(3, oai);
|
||||
// preparedStatement.setString(4, period);
|
||||
// preparedStatement.setString(5, type);
|
||||
// preparedStatement.setInt(6, Integer.parseInt(count));
|
||||
// preparedStatement.addBatch();
|
||||
// batch_size++;
|
||||
// if (batch_size == 10000) {
|
||||
// preparedStatement.executeBatch();
|
||||
// ConnectDB.getConnection().commit();
|
||||
// batch_size = 0;
|
||||
// }
|
||||
// }
|
||||
// // break;
|
||||
// }
|
||||
// break;
|
||||
}
|
||||
|
||||
|
@ -267,141 +297,32 @@ public class IrusStats {
|
|||
ConnectDB.getConnection().close();
|
||||
}
|
||||
|
||||
public void processIrusIRReport(String opendoar, String startDate) throws Exception {
|
||||
ConnectDB.getConnection().setAutoCommit(false);
|
||||
private String getJson(String url) throws Exception {
|
||||
try {
|
||||
System.out.println("===> Connecting to: " + url);
|
||||
URL website = new URL(url);
|
||||
System.out.println("Connection url -----> " + url);
|
||||
URLConnection connection = website.openConnection();
|
||||
|
||||
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM");
|
||||
|
||||
Calendar start = Calendar.getInstance();
|
||||
start.set(Calendar.YEAR, 2016);
|
||||
start.set(Calendar.MONTH, Calendar.JANUARY);
|
||||
// start.setTime(simpleDateFormat.parse("2016-01"));
|
||||
|
||||
Calendar end = Calendar.getInstance();
|
||||
end.add(Calendar.DAY_OF_MONTH, -1);
|
||||
|
||||
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
|
||||
start.setTime(sdf.parse(startDate));
|
||||
|
||||
String createTablesQuery = "-- Table: shadow.sushilog" + opendoar + "\n"
|
||||
+ "\n"
|
||||
+ "-- DROP TABLE shadow.sushilog" + opendoar + ";\n"
|
||||
+ "\n"
|
||||
+ "CREATE TABLE shadow.sushilog" + opendoar + "\n"
|
||||
+ "(\n"
|
||||
+ " source text COLLATE pg_catalog.\"default\" NOT NULL,\n"
|
||||
+ " repository text COLLATE pg_catalog.\"default\" NOT NULL,\n"
|
||||
+ " rid text COLLATE pg_catalog.\"default\" NOT NULL,\n"
|
||||
+ " date text COLLATE pg_catalog.\"default\" NOT NULL,\n"
|
||||
+ " metric_type text COLLATE pg_catalog.\"default\" NOT NULL,\n"
|
||||
+ " count integer,\n"
|
||||
+ " CONSTRAINT sushilog" + opendoar + "_pkey PRIMARY KEY (source, repository, rid, date, metric_type)\n"
|
||||
+ " USING INDEX TABLESPACE index_storage\n"
|
||||
+ ")\n"
|
||||
+ "\n"
|
||||
+ "TABLESPACE pg_default;\n"
|
||||
+ "\n"
|
||||
+ "ALTER TABLE shadow.sushilog" + opendoar + "\n"
|
||||
+ " OWNER to sqoop;\n"
|
||||
+ "\n"
|
||||
+ "-- Rule: ignore_duplicate_inserts ON shadow.sushilog" + opendoar + "\n"
|
||||
+ "\n"
|
||||
+ "-- DROP Rule ignore_duplicate_inserts ON shadow.sushilog" + opendoar + ";\n"
|
||||
+ "\n"
|
||||
+ "CREATE OR REPLACE RULE ignore_duplicate_inserts AS\n"
|
||||
+ " ON INSERT TO shadow.sushilog" + opendoar + "\n"
|
||||
+ " WHERE (EXISTS ( SELECT sushilog" + opendoar + ".source,\n"
|
||||
+ " sushilog" + opendoar + ".repository,\n"
|
||||
+ " sushilog" + opendoar + ".rid,\n"
|
||||
+ " sushilog" + opendoar + ".date\n"
|
||||
+ " FROM sushilog" + opendoar + "\n"
|
||||
+ " WHERE sushilog" + opendoar + ".source = new.source AND sushilog" + opendoar
|
||||
+ ".repository = new.repository AND sushilog" + opendoar + ".rid = new.rid AND sushilog" + opendoar
|
||||
+ ".date = new.date AND sushilog" + opendoar + ".metric_type = new.metric_type))\n"
|
||||
+ " DO INSTEAD\n"
|
||||
+ "NOTHING;";
|
||||
|
||||
Statement stCreateTables = ConnectDB.getConnection().createStatement();
|
||||
stCreateTables.execute(createTablesQuery);
|
||||
ConnectDB.getConnection().commit();
|
||||
|
||||
PreparedStatement preparedStatement = ConnectDB
|
||||
.getConnection()
|
||||
.prepareStatement(
|
||||
"INSERT INTO sushilog" + opendoar
|
||||
+ " (source, repository, rid, date, metric_type, count) VALUES (?,?,?,?,?,?)");
|
||||
int batch_size = 0;
|
||||
|
||||
while (start.before(end)) {
|
||||
// log.info("date: " + simpleDateFormat.format(start.getTime()));
|
||||
String reportUrl = "https://irus.jisc.ac.uk/api/sushilite/v1_7/GetReport/?Report=IR1&Release=4&RequestorID=OpenAIRE&BeginDate="
|
||||
+ simpleDateFormat.format(start.getTime()) + "&EndDate=2019-10-31&RepositoryIdentifier=opendoar%3A"
|
||||
+ opendoar + "&ItemIdentifier=&ItemDataType=&hasDOI=&Granularity=Monthly&Callback=";
|
||||
start.add(Calendar.MONTH, 1);
|
||||
|
||||
String text = getJson(reportUrl, "", "");
|
||||
if (text == null) {
|
||||
continue;
|
||||
}
|
||||
|
||||
JSONParser parser = new JSONParser();
|
||||
JSONObject jsonObject = (JSONObject) parser.parse(text);
|
||||
jsonObject = (JSONObject) jsonObject.get("ReportResponse");
|
||||
jsonObject = (JSONObject) jsonObject.get("Report");
|
||||
jsonObject = (JSONObject) jsonObject.get("Report");
|
||||
jsonObject = (JSONObject) jsonObject.get("Customer");
|
||||
JSONArray jsonArray = (JSONArray) jsonObject.get("ReportItems");
|
||||
if (jsonArray == null) {
|
||||
continue;
|
||||
}
|
||||
String oai = "";
|
||||
for (Object aJsonArray : jsonArray) {
|
||||
JSONObject jsonObjectRow = (JSONObject) aJsonArray;
|
||||
JSONArray itemIdentifier = (JSONArray) jsonObjectRow.get("ItemIdentifier");
|
||||
for (Object identifier : itemIdentifier) {
|
||||
JSONObject oaiPmh = (JSONObject) identifier;
|
||||
if (oaiPmh.get("Type").toString().equals("OAI")) {
|
||||
oai = oaiPmh.get("Value").toString();
|
||||
// System.out.println("OAI: " + oai);
|
||||
break;
|
||||
}
|
||||
// connection.setRequestProperty ("Authorization", "Basic "+encoded);
|
||||
StringBuilder response;
|
||||
try (BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream()))) {
|
||||
response = new StringBuilder();
|
||||
String inputLine;
|
||||
while ((inputLine = in.readLine()) != null) {
|
||||
response.append(inputLine);
|
||||
// response.append("\n");
|
||||
}
|
||||
|
||||
JSONArray itemPerformance = (JSONArray) jsonObjectRow.get("ItemPerformance");
|
||||
String period;
|
||||
String type;
|
||||
String count;
|
||||
for (Object perf : itemPerformance) {
|
||||
JSONObject performance = (JSONObject) perf;
|
||||
JSONObject periodObj = (JSONObject) performance.get("Period");
|
||||
period = periodObj.get("Begin").toString();
|
||||
JSONObject instanceObj = (JSONObject) performance.get("Instance");
|
||||
type = instanceObj.get("MetricType").toString();
|
||||
count = instanceObj.get("Count").toString();
|
||||
// System.out.println(oai + " : " + period + " : " + count);
|
||||
|
||||
preparedStatement.setString(1, "IRUS-UK");
|
||||
preparedStatement.setString(2, "opendoar____::" + opendoar);
|
||||
preparedStatement.setString(3, oai);
|
||||
preparedStatement.setString(4, period);
|
||||
preparedStatement.setString(5, type);
|
||||
preparedStatement.setInt(6, Integer.parseInt(count));
|
||||
preparedStatement.addBatch();
|
||||
batch_size++;
|
||||
if (batch_size == 10000) {
|
||||
preparedStatement.executeBatch();
|
||||
ConnectDB.getConnection().commit();
|
||||
batch_size = 0;
|
||||
}
|
||||
}
|
||||
// break;
|
||||
}
|
||||
// break;
|
||||
|
||||
System.out.println("response ====> " + response.toString());
|
||||
|
||||
return response.toString();
|
||||
} catch (Exception e) {
|
||||
log.error("Failed to get URL: " + e);
|
||||
System.out.println("Failed to get URL: " + e);
|
||||
throw new Exception("Failed to get URL: " + e.toString(), e);
|
||||
}
|
||||
|
||||
preparedStatement.executeBatch();
|
||||
ConnectDB.getConnection().commit();
|
||||
ConnectDB.getConnection().close();
|
||||
}
|
||||
|
||||
private String getJson(String url, String username, String password) throws Exception {
|
||||
|
|
|
@ -27,7 +27,7 @@ public class SarcStats {
|
|||
private final Logger log = Logger.getLogger(this.getClass());
|
||||
|
||||
public SarcStats() throws Exception {
|
||||
createTables();
|
||||
// createTables();
|
||||
}
|
||||
|
||||
private void createTables() throws Exception {
|
||||
|
@ -228,7 +228,7 @@ public class SarcStats {
|
|||
ConnectDB.getConnection().close();
|
||||
}
|
||||
|
||||
private String getJson(String url) {
|
||||
private String getJson(String url) throws Exception {
|
||||
// String cred=username+":"+password;
|
||||
// String encoded = new sun.misc.BASE64Encoder().encode (cred.getBytes());
|
||||
try {
|
||||
|
@ -246,10 +246,11 @@ public class SarcStats {
|
|||
}
|
||||
return response.toString();
|
||||
} catch (Exception e) {
|
||||
|
||||
log.error("Failed to get URL: " + e);
|
||||
// System.out.println("Failed to get URL: " + e);
|
||||
return null;
|
||||
// throw new Exception("Failed to get URL: " + e.toString(), e);
|
||||
System.out.println("Failed to get URL: " + e);
|
||||
// return null;
|
||||
throw new Exception("Failed to get URL: " + e.toString(), e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -32,38 +32,41 @@ public class UsageStatsExporter {
|
|||
String portalMatomoID = "109";
|
||||
String irusUKBaseURL = "https://irus.jisc.ac.uk/api/sushilite/v1_7/";
|
||||
|
||||
String irusUKReportPath = "/user/spyros/logs/usage_stats_logs/irusUKReports";
|
||||
|
||||
// connect to DB
|
||||
ConnectDB.init(properties);
|
||||
|
||||
// Create DB tables - they are also needed to download the statistics too
|
||||
PiwikStatsDB piwikstatsdb = new PiwikStatsDB(repoLogPath, portalLogPath);
|
||||
// // Create DB tables - they are also needed to download the statistics too
|
||||
// PiwikStatsDB piwikstatsdb = new PiwikStatsDB(repoLogPath, portalLogPath);
|
||||
//
|
||||
// // Download the statistics - The following 2 lines are not needed after the download - Commenting them out for
|
||||
// // the moment
|
||||
// PiwikDownloadLogs piwd = new PiwikDownloadLogs(matomoBaseURL, matomoAuthToken);
|
||||
// piwd.GetOpenAIRELogs(repoLogPath, portalLogPath, portalMatomoID);
|
||||
//
|
||||
// System.exit(0);
|
||||
//
|
||||
// // Create DB tables, insert/update statistics
|
||||
//// String cRobotsUrl = properties.getProperty("COUNTER_robots_Url");
|
||||
// String cRobotsUrl = "https://raw.githubusercontent.com/atmire/COUNTER-Robots/master/COUNTER_Robots_list.json";
|
||||
// piwikstatsdb.setCounterRobotsURL(cRobotsUrl);
|
||||
// piwikstatsdb.processLogs();
|
||||
// log.info("process logs done");
|
||||
|
||||
// Download the statistics - The following 2 lines are not needed after the download - Commenting them out for
|
||||
// the moment
|
||||
PiwikDownloadLogs piwd = new PiwikDownloadLogs(matomoBaseURL, matomoAuthToken);
|
||||
piwd.GetOpenAIRELogs(repoLogPath, portalLogPath, portalMatomoID);
|
||||
|
||||
System.exit(0);
|
||||
|
||||
// Create DB tables, insert/update statistics
|
||||
// String cRobotsUrl = properties.getProperty("COUNTER_robots_Url");
|
||||
String cRobotsUrl = "https://raw.githubusercontent.com/atmire/COUNTER-Robots/master/COUNTER_Robots_list.json";
|
||||
piwikstatsdb.setCounterRobotsURL(cRobotsUrl);
|
||||
piwikstatsdb.processLogs();
|
||||
log.info("process logs done");
|
||||
|
||||
IrusStats irusstats = new IrusStats(irusUKBaseURL);
|
||||
irusstats.processIrusRRReport();
|
||||
irusstats.irusStats();
|
||||
log.info("irus done");
|
||||
// IrusStats irusstats = new IrusStats(irusUKBaseURL);
|
||||
// irusstats.processIrusRRReport(irusUKReportPath);
|
||||
|
||||
// irusstats.irusStats();
|
||||
// log.info("irus done");
|
||||
//
|
||||
SarcStats sarcStats = new SarcStats();
|
||||
sarcStats.processSarc();
|
||||
sarcStats.sarcStats();
|
||||
// sarcStats.sarcStats();
|
||||
log.info("sarc done");
|
||||
|
||||
// finalize usagestats
|
||||
piwikstatsdb.finalizeStats();
|
||||
log.info("finalized stats");
|
||||
// // finalize usagestats
|
||||
// piwikstatsdb.finalizeStats();
|
||||
// log.info("finalized stats");
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue