forked from D-Net/dnet-hadoop
Changes to download Sarc stats
This commit is contained in:
parent
c035fa7648
commit
8db9a7ccdc
|
@ -12,6 +12,10 @@ import java.sql.Statement;
|
|||
import java.text.SimpleDateFormat;
|
||||
import java.util.Calendar;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.json.simple.JSONArray;
|
||||
import org.json.simple.JSONObject;
|
||||
|
@ -58,24 +62,24 @@ public class SarcStats {
|
|||
}
|
||||
}
|
||||
|
||||
public void processSarc() throws Exception {
|
||||
processARReport("https://revistas.rcaap.pt/motricidade/sushiLite/v1_7/", "1646-107X");
|
||||
processARReport("https://revistas.rcaap.pt/antropologicas/sushiLite/v1_7/", "0873-819X");
|
||||
processARReport("https://revistas.rcaap.pt/interaccoes/sushiLite/v1_7/", "1646-2335");
|
||||
processARReport("https://revistas.rcaap.pt/cct/sushiLite/v1_7/", "2182-3030");
|
||||
processARReport("https://actapediatrica.spp.pt/sushiLite/v1_7/", "0873-9781");
|
||||
processARReport("https://revistas.rcaap.pt/sociologiapp/sushiLite/v1_7/", "0873-6529");
|
||||
processARReport("https://revistas.rcaap.pt/finisterra/sushiLite/v1_7/", "0430-5027");
|
||||
processARReport("https://revistas.rcaap.pt/sisyphus/sushiLite/v1_7/", "2182-8474");
|
||||
processARReport("https://revistas.rcaap.pt/anestesiologia/sushiLite/v1_7/", "0871-6099");
|
||||
processARReport("https://revistas.rcaap.pt/rpe/sushiLite/v1_7/", "0871-9187");
|
||||
processARReport("https://revistas.rcaap.pt/psilogos/sushiLite/v1_7/", "1646-091X");
|
||||
processARReport("https://revistas.rcaap.pt/juridica/sushiLite/v1_7/", "2183-5799");
|
||||
processARReport("https://revistas.rcaap.pt/ecr/sushiLite/v1_7/", "1647-2098");
|
||||
processARReport("https://revistas.rcaap.pt/nascercrescer/sushiLite/v1_7/", "0872-0754");
|
||||
processARReport("https://revistas.rcaap.pt/cea/sushiLite/v1_7/", "1645-3794");
|
||||
processARReport("https://revistas.rcaap.pt/proelium/sushiLite/v1_7/", "1645-8826");
|
||||
processARReport("https://revistas.rcaap.pt/millenium/sushiLite/v1_7/", "0873-3015");
|
||||
public void processSarc(String sarcsReportPath) throws Exception {
|
||||
processARReport(sarcsReportPath, "https://revistas.rcaap.pt/motricidade/sushiLite/v1_7/", "1646-107X");
|
||||
processARReport(sarcsReportPath, "https://revistas.rcaap.pt/antropologicas/sushiLite/v1_7/", "0873-819X");
|
||||
processARReport(sarcsReportPath, "https://revistas.rcaap.pt/interaccoes/sushiLite/v1_7/", "1646-2335");
|
||||
processARReport(sarcsReportPath, "https://revistas.rcaap.pt/cct/sushiLite/v1_7/", "2182-3030");
|
||||
processARReport(sarcsReportPath, "https://actapediatrica.spp.pt/sushiLite/v1_7/", "0873-9781");
|
||||
processARReport(sarcsReportPath, "https://revistas.rcaap.pt/sociologiapp/sushiLite/v1_7/", "0873-6529");
|
||||
processARReport(sarcsReportPath, "https://revistas.rcaap.pt/finisterra/sushiLite/v1_7/", "0430-5027");
|
||||
processARReport(sarcsReportPath, "https://revistas.rcaap.pt/sisyphus/sushiLite/v1_7/", "2182-8474");
|
||||
processARReport(sarcsReportPath, "https://revistas.rcaap.pt/anestesiologia/sushiLite/v1_7/", "0871-6099");
|
||||
processARReport(sarcsReportPath, "https://revistas.rcaap.pt/rpe/sushiLite/v1_7/", "0871-9187");
|
||||
processARReport(sarcsReportPath, "https://revistas.rcaap.pt/psilogos/sushiLite/v1_7/", "1646-091X");
|
||||
processARReport(sarcsReportPath, "https://revistas.rcaap.pt/juridica/sushiLite/v1_7/", "2183-5799");
|
||||
processARReport(sarcsReportPath, "https://revistas.rcaap.pt/ecr/sushiLite/v1_7/", "1647-2098");
|
||||
processARReport(sarcsReportPath, "https://revistas.rcaap.pt/nascercrescer/sushiLite/v1_7/", "0872-0754");
|
||||
processARReport(sarcsReportPath, "https://revistas.rcaap.pt/cea/sushiLite/v1_7/", "1645-3794");
|
||||
processARReport(sarcsReportPath, "https://revistas.rcaap.pt/proelium/sushiLite/v1_7/", "1645-8826");
|
||||
processARReport(sarcsReportPath, "https://revistas.rcaap.pt/millenium/sushiLite/v1_7/", "0873-3015");
|
||||
}
|
||||
|
||||
public void sarcStats() throws Exception {
|
||||
|
@ -94,7 +98,7 @@ public class SarcStats {
|
|||
ConnectDB.getConnection().close();
|
||||
}
|
||||
|
||||
public void processARReport(String url, String issn) throws Exception {
|
||||
public void processARReport(String sarcsReportPath, String url, String issn) throws Exception {
|
||||
log.info("Processing SARC! issn: " + issn + " with url: " + url);
|
||||
ConnectDB.getConnection().setAutoCommit(false);
|
||||
|
||||
|
@ -111,7 +115,7 @@ public class SarcStats {
|
|||
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
|
||||
PreparedStatement st = ConnectDB
|
||||
.getConnection()
|
||||
.prepareStatement("SELECT max(date) FROM sushilog WHERE repository=?;");
|
||||
.prepareStatement("SELECT max(date) FROM usagestats_13.sushilog WHERE repository=?");
|
||||
st.setString(1, issn);
|
||||
ResultSet rs_date = st.executeQuery();
|
||||
while (rs_date.next()) {
|
||||
|
@ -169,57 +173,71 @@ public class SarcStats {
|
|||
continue;
|
||||
}
|
||||
|
||||
// Creating the file in the filesystem
|
||||
FileSystem fs = FileSystem.get(new Configuration());
|
||||
String filePath = sarcsReportPath + "/" + "SarcsARReport_" +
|
||||
simpleDateFormat.format(start.getTime()) + ".json";
|
||||
System.out.println("Storing to file: " + filePath);
|
||||
FSDataOutputStream fin = fs.create(new Path(filePath), true);
|
||||
|
||||
String rid = "";
|
||||
for (Object aJsonArray : jsonArray) {
|
||||
|
||||
JSONObject jsonObjectRow = (JSONObject) aJsonArray;
|
||||
JSONArray itemIdentifier = new JSONArray();
|
||||
obj = jsonObjectRow.get("c:ItemIdentifier");
|
||||
if (obj instanceof JSONObject) {
|
||||
itemIdentifier.add(obj);
|
||||
} else {
|
||||
// JSONArray itemIdentifier = (JSONArray) jsonObjectRow.get("c:ItemIdentifier");
|
||||
itemIdentifier = (JSONArray) obj;
|
||||
}
|
||||
for (Object identifier : itemIdentifier) {
|
||||
JSONObject doi = (JSONObject) identifier;
|
||||
if (doi.get("c:Type").toString().equals("DOI")) {
|
||||
rid = doi.get("c:Value").toString();
|
||||
// System.out.println("DOI: " + rid);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (rid.isEmpty()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
JSONObject itemPerformance = (JSONObject) jsonObjectRow.get("c:ItemPerformance");
|
||||
// for (Object perf : itemPerformance) {
|
||||
JSONObject performance = (JSONObject) itemPerformance;
|
||||
JSONObject periodObj = (JSONObject) performance.get("c:Period");
|
||||
String period = periodObj.get("c:Begin").toString();
|
||||
JSONObject instanceObj = (JSONObject) performance.get("c:Instance");
|
||||
String type = instanceObj.get("c:MetricType").toString();
|
||||
String count = instanceObj.get("c:Count").toString();
|
||||
// System.out.println(rid + " : " + period + " : " + count);
|
||||
|
||||
preparedStatement.setString(1, "SARC-OJS");
|
||||
preparedStatement.setString(2, issn);
|
||||
// preparedStatement.setString(2, url);
|
||||
preparedStatement.setString(3, rid);
|
||||
preparedStatement.setString(4, period);
|
||||
preparedStatement.setString(5, type);
|
||||
preparedStatement.setInt(6, Integer.parseInt(count));
|
||||
preparedStatement.addBatch();
|
||||
batch_size++;
|
||||
if (batch_size == 10000) {
|
||||
preparedStatement.executeBatch();
|
||||
ConnectDB.getConnection().commit();
|
||||
batch_size = 0;
|
||||
}
|
||||
// }
|
||||
|
||||
// break;
|
||||
fin.write(jsonObjectRow.toJSONString().getBytes());
|
||||
fin.writeChar('\n');
|
||||
}
|
||||
fin.close();
|
||||
|
||||
// JSONObject jsonObjectRow = (JSONObject) aJsonArray;
|
||||
// JSONArray itemIdentifier = new JSONArray();
|
||||
// obj = jsonObjectRow.get("c:ItemIdentifier");
|
||||
// if (obj instanceof JSONObject) {
|
||||
// itemIdentifier.add(obj);
|
||||
// } else {
|
||||
// // JSONArray itemIdentifier = (JSONArray) jsonObjectRow.get("c:ItemIdentifier");
|
||||
// itemIdentifier = (JSONArray) obj;
|
||||
// }
|
||||
// for (Object identifier : itemIdentifier) {
|
||||
// JSONObject doi = (JSONObject) identifier;
|
||||
// if (doi.get("c:Type").toString().equals("DOI")) {
|
||||
// rid = doi.get("c:Value").toString();
|
||||
// // System.out.println("DOI: " + rid);
|
||||
// break;
|
||||
// }
|
||||
// }
|
||||
// if (rid.isEmpty()) {
|
||||
// continue;
|
||||
// }
|
||||
//
|
||||
// JSONObject itemPerformance = (JSONObject) jsonObjectRow.get("c:ItemPerformance");
|
||||
// // for (Object perf : itemPerformance) {
|
||||
// JSONObject performance = (JSONObject) itemPerformance;
|
||||
// JSONObject periodObj = (JSONObject) performance.get("c:Period");
|
||||
// String period = periodObj.get("c:Begin").toString();
|
||||
// JSONObject instanceObj = (JSONObject) performance.get("c:Instance");
|
||||
// String type = instanceObj.get("c:MetricType").toString();
|
||||
// String count = instanceObj.get("c:Count").toString();
|
||||
// // System.out.println(rid + " : " + period + " : " + count);
|
||||
//
|
||||
// preparedStatement.setString(1, "SARC-OJS");
|
||||
// preparedStatement.setString(2, issn);
|
||||
// // preparedStatement.setString(2, url);
|
||||
// preparedStatement.setString(3, rid);
|
||||
// preparedStatement.setString(4, period);
|
||||
// preparedStatement.setString(5, type);
|
||||
// preparedStatement.setInt(6, Integer.parseInt(count));
|
||||
// preparedStatement.addBatch();
|
||||
// batch_size++;
|
||||
// if (batch_size == 10000) {
|
||||
// preparedStatement.executeBatch();
|
||||
// ConnectDB.getConnection().commit();
|
||||
// batch_size = 0;
|
||||
// }
|
||||
// // }
|
||||
//
|
||||
// // break;
|
||||
// }
|
||||
// break;
|
||||
}
|
||||
|
||||
|
|
|
@ -33,6 +33,7 @@ public class UsageStatsExporter {
|
|||
String irusUKBaseURL = "https://irus.jisc.ac.uk/api/sushilite/v1_7/";
|
||||
|
||||
String irusUKReportPath = "/user/spyros/logs/usage_stats_logs/irusUKReports";
|
||||
String sarcsReportPath = "/user/spyros/logs/usage_stats_logs/sarcReports";
|
||||
|
||||
// connect to DB
|
||||
ConnectDB.init(properties);
|
||||
|
@ -61,7 +62,7 @@ public class UsageStatsExporter {
|
|||
// log.info("irus done");
|
||||
//
|
||||
SarcStats sarcStats = new SarcStats();
|
||||
sarcStats.processSarc();
|
||||
sarcStats.processSarc(sarcsReportPath);
|
||||
// sarcStats.sarcStats();
|
||||
log.info("sarc done");
|
||||
|
||||
|
|
Loading…
Reference in New Issue