Changes to download Sarc stats

This commit is contained in:
Spyros Zoupanos 2020-07-25 13:17:47 +03:00
parent c035fa7648
commit 8db9a7ccdc
2 changed files with 87 additions and 68 deletions

View File

@ -12,6 +12,10 @@ import java.sql.Statement;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.Logger;
import org.json.simple.JSONArray;
import org.json.simple.JSONObject;
@ -58,24 +62,24 @@ public class SarcStats {
}
}
public void processSarc() throws Exception {
processARReport("https://revistas.rcaap.pt/motricidade/sushiLite/v1_7/", "1646-107X");
processARReport("https://revistas.rcaap.pt/antropologicas/sushiLite/v1_7/", "0873-819X");
processARReport("https://revistas.rcaap.pt/interaccoes/sushiLite/v1_7/", "1646-2335");
processARReport("https://revistas.rcaap.pt/cct/sushiLite/v1_7/", "2182-3030");
processARReport("https://actapediatrica.spp.pt/sushiLite/v1_7/", "0873-9781");
processARReport("https://revistas.rcaap.pt/sociologiapp/sushiLite/v1_7/", "0873-6529");
processARReport("https://revistas.rcaap.pt/finisterra/sushiLite/v1_7/", "0430-5027");
processARReport("https://revistas.rcaap.pt/sisyphus/sushiLite/v1_7/", "2182-8474");
processARReport("https://revistas.rcaap.pt/anestesiologia/sushiLite/v1_7/", "0871-6099");
processARReport("https://revistas.rcaap.pt/rpe/sushiLite/v1_7/", "0871-9187");
processARReport("https://revistas.rcaap.pt/psilogos/sushiLite/v1_7/", "1646-091X");
processARReport("https://revistas.rcaap.pt/juridica/sushiLite/v1_7/", "2183-5799");
processARReport("https://revistas.rcaap.pt/ecr/sushiLite/v1_7/", "1647-2098");
processARReport("https://revistas.rcaap.pt/nascercrescer/sushiLite/v1_7/", "0872-0754");
processARReport("https://revistas.rcaap.pt/cea/sushiLite/v1_7/", "1645-3794");
processARReport("https://revistas.rcaap.pt/proelium/sushiLite/v1_7/", "1645-8826");
processARReport("https://revistas.rcaap.pt/millenium/sushiLite/v1_7/", "0873-3015");
public void processSarc(String sarcsReportPath) throws Exception {
processARReport(sarcsReportPath, "https://revistas.rcaap.pt/motricidade/sushiLite/v1_7/", "1646-107X");
processARReport(sarcsReportPath, "https://revistas.rcaap.pt/antropologicas/sushiLite/v1_7/", "0873-819X");
processARReport(sarcsReportPath, "https://revistas.rcaap.pt/interaccoes/sushiLite/v1_7/", "1646-2335");
processARReport(sarcsReportPath, "https://revistas.rcaap.pt/cct/sushiLite/v1_7/", "2182-3030");
processARReport(sarcsReportPath, "https://actapediatrica.spp.pt/sushiLite/v1_7/", "0873-9781");
processARReport(sarcsReportPath, "https://revistas.rcaap.pt/sociologiapp/sushiLite/v1_7/", "0873-6529");
processARReport(sarcsReportPath, "https://revistas.rcaap.pt/finisterra/sushiLite/v1_7/", "0430-5027");
processARReport(sarcsReportPath, "https://revistas.rcaap.pt/sisyphus/sushiLite/v1_7/", "2182-8474");
processARReport(sarcsReportPath, "https://revistas.rcaap.pt/anestesiologia/sushiLite/v1_7/", "0871-6099");
processARReport(sarcsReportPath, "https://revistas.rcaap.pt/rpe/sushiLite/v1_7/", "0871-9187");
processARReport(sarcsReportPath, "https://revistas.rcaap.pt/psilogos/sushiLite/v1_7/", "1646-091X");
processARReport(sarcsReportPath, "https://revistas.rcaap.pt/juridica/sushiLite/v1_7/", "2183-5799");
processARReport(sarcsReportPath, "https://revistas.rcaap.pt/ecr/sushiLite/v1_7/", "1647-2098");
processARReport(sarcsReportPath, "https://revistas.rcaap.pt/nascercrescer/sushiLite/v1_7/", "0872-0754");
processARReport(sarcsReportPath, "https://revistas.rcaap.pt/cea/sushiLite/v1_7/", "1645-3794");
processARReport(sarcsReportPath, "https://revistas.rcaap.pt/proelium/sushiLite/v1_7/", "1645-8826");
processARReport(sarcsReportPath, "https://revistas.rcaap.pt/millenium/sushiLite/v1_7/", "0873-3015");
}
public void sarcStats() throws Exception {
@ -94,7 +98,7 @@ public class SarcStats {
ConnectDB.getConnection().close();
}
public void processARReport(String url, String issn) throws Exception {
public void processARReport(String sarcsReportPath, String url, String issn) throws Exception {
log.info("Processing SARC! issn: " + issn + " with url: " + url);
ConnectDB.getConnection().setAutoCommit(false);
@ -111,7 +115,7 @@ public class SarcStats {
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
PreparedStatement st = ConnectDB
.getConnection()
.prepareStatement("SELECT max(date) FROM sushilog WHERE repository=?;");
.prepareStatement("SELECT max(date) FROM usagestats_13.sushilog WHERE repository=?");
st.setString(1, issn);
ResultSet rs_date = st.executeQuery();
while (rs_date.next()) {
@ -169,57 +173,71 @@ public class SarcStats {
continue;
}
// Creating the file in the filesystem
FileSystem fs = FileSystem.get(new Configuration());
String filePath = sarcsReportPath + "/" + "SarcsARReport_" +
simpleDateFormat.format(start.getTime()) + ".json";
System.out.println("Storing to file: " + filePath);
FSDataOutputStream fin = fs.create(new Path(filePath), true);
String rid = "";
for (Object aJsonArray : jsonArray) {
JSONObject jsonObjectRow = (JSONObject) aJsonArray;
JSONArray itemIdentifier = new JSONArray();
obj = jsonObjectRow.get("c:ItemIdentifier");
if (obj instanceof JSONObject) {
itemIdentifier.add(obj);
} else {
// JSONArray itemIdentifier = (JSONArray) jsonObjectRow.get("c:ItemIdentifier");
itemIdentifier = (JSONArray) obj;
}
for (Object identifier : itemIdentifier) {
JSONObject doi = (JSONObject) identifier;
if (doi.get("c:Type").toString().equals("DOI")) {
rid = doi.get("c:Value").toString();
// System.out.println("DOI: " + rid);
break;
}
}
if (rid.isEmpty()) {
continue;
}
JSONObject itemPerformance = (JSONObject) jsonObjectRow.get("c:ItemPerformance");
// for (Object perf : itemPerformance) {
JSONObject performance = (JSONObject) itemPerformance;
JSONObject periodObj = (JSONObject) performance.get("c:Period");
String period = periodObj.get("c:Begin").toString();
JSONObject instanceObj = (JSONObject) performance.get("c:Instance");
String type = instanceObj.get("c:MetricType").toString();
String count = instanceObj.get("c:Count").toString();
// System.out.println(rid + " : " + period + " : " + count);
preparedStatement.setString(1, "SARC-OJS");
preparedStatement.setString(2, issn);
// preparedStatement.setString(2, url);
preparedStatement.setString(3, rid);
preparedStatement.setString(4, period);
preparedStatement.setString(5, type);
preparedStatement.setInt(6, Integer.parseInt(count));
preparedStatement.addBatch();
batch_size++;
if (batch_size == 10000) {
preparedStatement.executeBatch();
ConnectDB.getConnection().commit();
batch_size = 0;
}
// }
// break;
fin.write(jsonObjectRow.toJSONString().getBytes());
fin.writeChar('\n');
}
fin.close();
// JSONObject jsonObjectRow = (JSONObject) aJsonArray;
// JSONArray itemIdentifier = new JSONArray();
// obj = jsonObjectRow.get("c:ItemIdentifier");
// if (obj instanceof JSONObject) {
// itemIdentifier.add(obj);
// } else {
// // JSONArray itemIdentifier = (JSONArray) jsonObjectRow.get("c:ItemIdentifier");
// itemIdentifier = (JSONArray) obj;
// }
// for (Object identifier : itemIdentifier) {
// JSONObject doi = (JSONObject) identifier;
// if (doi.get("c:Type").toString().equals("DOI")) {
// rid = doi.get("c:Value").toString();
// // System.out.println("DOI: " + rid);
// break;
// }
// }
// if (rid.isEmpty()) {
// continue;
// }
//
// JSONObject itemPerformance = (JSONObject) jsonObjectRow.get("c:ItemPerformance");
// // for (Object perf : itemPerformance) {
// JSONObject performance = (JSONObject) itemPerformance;
// JSONObject periodObj = (JSONObject) performance.get("c:Period");
// String period = periodObj.get("c:Begin").toString();
// JSONObject instanceObj = (JSONObject) performance.get("c:Instance");
// String type = instanceObj.get("c:MetricType").toString();
// String count = instanceObj.get("c:Count").toString();
// // System.out.println(rid + " : " + period + " : " + count);
//
// preparedStatement.setString(1, "SARC-OJS");
// preparedStatement.setString(2, issn);
// // preparedStatement.setString(2, url);
// preparedStatement.setString(3, rid);
// preparedStatement.setString(4, period);
// preparedStatement.setString(5, type);
// preparedStatement.setInt(6, Integer.parseInt(count));
// preparedStatement.addBatch();
// batch_size++;
// if (batch_size == 10000) {
// preparedStatement.executeBatch();
// ConnectDB.getConnection().commit();
// batch_size = 0;
// }
// // }
//
// // break;
// }
// break;
}

View File

@ -33,6 +33,7 @@ public class UsageStatsExporter {
String irusUKBaseURL = "https://irus.jisc.ac.uk/api/sushilite/v1_7/";
String irusUKReportPath = "/user/spyros/logs/usage_stats_logs/irusUKReports";
String sarcsReportPath = "/user/spyros/logs/usage_stats_logs/sarcReports";
// connect to DB
ConnectDB.init(properties);
@ -61,7 +62,7 @@ public class UsageStatsExporter {
// log.info("irus done");
//
SarcStats sarcStats = new SarcStats();
sarcStats.processSarc();
sarcStats.processSarc(sarcsReportPath);
// sarcStats.sarcStats();
log.info("sarc done");