forked from D-Net/dnet-hadoop
Schema separation on sarc stats that are downloaded
This commit is contained in:
parent
8bb00add0d
commit
17acbb7fc6
|
@ -11,6 +11,7 @@ import java.sql.ResultSet;
|
|||
import java.sql.Statement;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.Calendar;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
|
@ -21,6 +22,7 @@ import org.apache.log4j.Logger;
|
|||
import org.json.simple.JSONArray;
|
||||
import org.json.simple.JSONObject;
|
||||
import org.json.simple.parser.JSONParser;
|
||||
import org.json.simple.parser.ParseException;
|
||||
|
||||
/**
|
||||
* Created by dpie
|
||||
|
@ -63,25 +65,59 @@ public class SarcStats {
|
|||
}
|
||||
}
|
||||
|
||||
public void processSarc(String sarcsReportPath) throws Exception {
|
||||
public void processSarc(String sarcsReportPathArray, String sarcsReportPathNonArray) throws Exception {
|
||||
// There was a problem to download the following file
|
||||
// processARReport(sarcsReportPath, "https://revistas.rcaap.pt/motricidade/sushiLite/v1_7/", "1646-107X");
|
||||
processARReport(sarcsReportPath, "https://revistas.rcaap.pt/antropologicas/sushiLite/v1_7/", "0873-819X");
|
||||
processARReport(sarcsReportPath, "https://revistas.rcaap.pt/interaccoes/sushiLite/v1_7/", "1646-2335");
|
||||
processARReport(sarcsReportPath, "https://revistas.rcaap.pt/cct/sushiLite/v1_7/", "2182-3030");
|
||||
processARReport(sarcsReportPath, "https://actapediatrica.spp.pt/sushiLite/v1_7/", "0873-9781");
|
||||
processARReport(sarcsReportPath, "https://revistas.rcaap.pt/sociologiapp/sushiLite/v1_7/", "0873-6529");
|
||||
processARReport(sarcsReportPath, "https://revistas.rcaap.pt/finisterra/sushiLite/v1_7/", "0430-5027");
|
||||
processARReport(sarcsReportPath, "https://revistas.rcaap.pt/sisyphus/sushiLite/v1_7/", "2182-8474");
|
||||
processARReport(sarcsReportPath, "https://revistas.rcaap.pt/anestesiologia/sushiLite/v1_7/", "0871-6099");
|
||||
processARReport(sarcsReportPath, "https://revistas.rcaap.pt/rpe/sushiLite/v1_7/", "0871-9187");
|
||||
processARReport(sarcsReportPath, "https://revistas.rcaap.pt/psilogos/sushiLite/v1_7/", "1646-091X");
|
||||
processARReport(sarcsReportPath, "https://revistas.rcaap.pt/juridica/sushiLite/v1_7/", "2183-5799");
|
||||
processARReport(sarcsReportPath, "https://revistas.rcaap.pt/ecr/sushiLite/v1_7/", "1647-2098");
|
||||
processARReport(sarcsReportPath, "https://revistas.rcaap.pt/nascercrescer/sushiLite/v1_7/", "0872-0754");
|
||||
processARReport(sarcsReportPath, "https://revistas.rcaap.pt/cea/sushiLite/v1_7/", "1645-3794");
|
||||
processARReport(sarcsReportPath, "https://revistas.rcaap.pt/proelium/sushiLite/v1_7/", "1645-8826");
|
||||
processARReport(sarcsReportPath, "https://revistas.rcaap.pt/millenium/sushiLite/v1_7/", "0873-3015");
|
||||
processARReport(
|
||||
sarcsReportPathArray, sarcsReportPathNonArray, "https://revistas.rcaap.pt/motricidade/sushiLite/v1_7/",
|
||||
"1646-107X");
|
||||
processARReport(
|
||||
sarcsReportPathArray, sarcsReportPathNonArray, "https://revistas.rcaap.pt/antropologicas/sushiLite/v1_7/",
|
||||
"0873-819X");
|
||||
processARReport(
|
||||
sarcsReportPathArray, sarcsReportPathNonArray, "https://revistas.rcaap.pt/interaccoes/sushiLite/v1_7/",
|
||||
"1646-2335");
|
||||
processARReport(
|
||||
sarcsReportPathArray, sarcsReportPathNonArray, "https://revistas.rcaap.pt/cct/sushiLite/v1_7/",
|
||||
"2182-3030");
|
||||
processARReport(
|
||||
sarcsReportPathArray, sarcsReportPathNonArray, "https://actapediatrica.spp.pt/sushiLite/v1_7/",
|
||||
"0873-9781");
|
||||
processARReport(
|
||||
sarcsReportPathArray, sarcsReportPathNonArray, "https://revistas.rcaap.pt/sociologiapp/sushiLite/v1_7/",
|
||||
"0873-6529");
|
||||
processARReport(
|
||||
sarcsReportPathArray, sarcsReportPathNonArray, "https://revistas.rcaap.pt/finisterra/sushiLite/v1_7/",
|
||||
"0430-5027");
|
||||
processARReport(
|
||||
sarcsReportPathArray, sarcsReportPathNonArray, "https://revistas.rcaap.pt/sisyphus/sushiLite/v1_7/",
|
||||
"2182-8474");
|
||||
processARReport(
|
||||
sarcsReportPathArray, sarcsReportPathNonArray, "https://revistas.rcaap.pt/anestesiologia/sushiLite/v1_7/",
|
||||
"0871-6099");
|
||||
processARReport(
|
||||
sarcsReportPathArray, sarcsReportPathNonArray, "https://revistas.rcaap.pt/rpe/sushiLite/v1_7/",
|
||||
"0871-9187");
|
||||
processARReport(
|
||||
sarcsReportPathArray, sarcsReportPathNonArray, "https://revistas.rcaap.pt/psilogos/sushiLite/v1_7/",
|
||||
"1646-091X");
|
||||
processARReport(
|
||||
sarcsReportPathArray, sarcsReportPathNonArray, "https://revistas.rcaap.pt/juridica/sushiLite/v1_7/",
|
||||
"2183-5799");
|
||||
processARReport(
|
||||
sarcsReportPathArray, sarcsReportPathNonArray, "https://revistas.rcaap.pt/ecr/sushiLite/v1_7/",
|
||||
"1647-2098");
|
||||
processARReport(
|
||||
sarcsReportPathArray, sarcsReportPathNonArray, "https://revistas.rcaap.pt/nascercrescer/sushiLite/v1_7/",
|
||||
"0872-0754");
|
||||
processARReport(
|
||||
sarcsReportPathArray, sarcsReportPathNonArray, "https://revistas.rcaap.pt/cea/sushiLite/v1_7/",
|
||||
"1645-3794");
|
||||
processARReport(
|
||||
sarcsReportPathArray, sarcsReportPathNonArray, "https://revistas.rcaap.pt/proelium/sushiLite/v1_7/",
|
||||
"1645-8826");
|
||||
processARReport(
|
||||
sarcsReportPathArray, sarcsReportPathNonArray, "https://revistas.rcaap.pt/millenium/sushiLite/v1_7/",
|
||||
"0873-3015");
|
||||
}
|
||||
|
||||
public void sarcStats() throws Exception {
|
||||
|
@ -100,7 +136,8 @@ public class SarcStats {
|
|||
ConnectDB.getConnection().close();
|
||||
}
|
||||
|
||||
public void processARReport(String sarcsReportPath, String url, String issn) throws Exception {
|
||||
public void processARReport(String sarcsReportPathArray, String sarcsReportPathNonArray,
|
||||
String url, String issn) throws Exception {
|
||||
log.info("Processing SARC! issn: " + issn + " with url: " + url);
|
||||
ConnectDB.getConnection().setAutoCommit(false);
|
||||
|
||||
|
@ -155,8 +192,19 @@ public class SarcStats {
|
|||
* PrintWriter wr = new PrintWriter(new FileWriter("logs/" + simpleDateFormat.format(start.getTime()) +
|
||||
* ".json")); wr.print(text); wr.close();
|
||||
*/
|
||||
|
||||
System.out.println("AAAAAAAAAAA text " + text);
|
||||
|
||||
JSONParser parser = new JSONParser();
|
||||
JSONObject jsonObject = (JSONObject) parser.parse(text);
|
||||
JSONObject jsonObject = null;
|
||||
try {
|
||||
jsonObject = (JSONObject) parser.parse(text);
|
||||
}
|
||||
// if there is a parsing error continue with the next url
|
||||
catch (ParseException pe) {
|
||||
continue;
|
||||
}
|
||||
|
||||
jsonObject = (JSONObject) jsonObject.get("sc:ReportResponse");
|
||||
jsonObject = (JSONObject) jsonObject.get("sc:Report");
|
||||
if (jsonObject == null) {
|
||||
|
@ -176,30 +224,37 @@ public class SarcStats {
|
|||
continue;
|
||||
}
|
||||
|
||||
// Creating the file in the filesystem
|
||||
FileSystem fs = FileSystem.get(new Configuration());
|
||||
String filePath = sarcsReportPath + "/" + "SarcsARReport_" +
|
||||
// Creating the file in the filesystem for the ItemIdentifier as array object
|
||||
FileSystem fsArray = FileSystem.get(new Configuration());
|
||||
String filePathArray = sarcsReportPathArray + "/" + "SarcsARReport_" +
|
||||
simpleDateFormat.format(start.getTime()) + ".json";
|
||||
System.out.println("Storing to file: " + filePath);
|
||||
FSDataOutputStream fin = fs.create(new Path(filePath), true);
|
||||
System.out.println("Storing to file: " + filePathArray);
|
||||
FSDataOutputStream finArray = fsArray.create(new Path(filePathArray), true);
|
||||
|
||||
// Creating the file in the filesystem for the ItemIdentifier as array object
|
||||
FileSystem fsNonArray = FileSystem.get(new Configuration());
|
||||
String filePathNonArray = sarcsReportPathNonArray + "/" + "SarcsARReport_" +
|
||||
simpleDateFormat.format(start.getTime()) + ".json";
|
||||
System.out.println("Storing to file: " + filePathNonArray);
|
||||
FSDataOutputStream finNonArray = fsNonArray.create(new Path(filePathNonArray), true);
|
||||
|
||||
String rid = "";
|
||||
for (Object aJsonArray : jsonArray) {
|
||||
|
||||
JSONObject jsonObjectRow = (JSONObject) aJsonArray;
|
||||
renameKeysRecursively(":", jsonObjectRow);
|
||||
System.out.println("oooo====> " + jsonObjectRow.toJSONString());
|
||||
|
||||
Set<String> jkeys = jsonObjectRow.keySet();
|
||||
for (String jkey : jkeys) {
|
||||
System.out.println("++++> " + jkey);
|
||||
String[] splitArray = jkey.split("c:");
|
||||
jkey = splitArray[splitArray.length - 1];
|
||||
System.out.println("New jkey: " + jkey);
|
||||
if (jsonObjectRow.get("ItemIdentifier") instanceof JSONObject) {
|
||||
finNonArray.write(jsonObjectRow.toJSONString().getBytes());
|
||||
finNonArray.writeChar('\n');
|
||||
} else {
|
||||
finArray.write(jsonObjectRow.toJSONString().getBytes());
|
||||
finArray.writeChar('\n');
|
||||
}
|
||||
}
|
||||
finArray.close();
|
||||
|
||||
fin.write(jsonObjectRow.toJSONString().getBytes());
|
||||
fin.writeChar('\n');
|
||||
}
|
||||
fin.close();
|
||||
//////////////////
|
||||
// JSONObject jsonObjectRow = (JSONObject) aJsonArray;
|
||||
// JSONArray itemIdentifier = new JSONArray();
|
||||
|
@ -254,12 +309,47 @@ public class SarcStats {
|
|||
// break;
|
||||
}
|
||||
|
||||
preparedStatement.executeBatch();
|
||||
ConnectDB.getConnection().commit();
|
||||
// preparedStatement.executeBatch();
|
||||
// ConnectDB.getConnection().commit();
|
||||
ConnectDB.getConnection().close();
|
||||
|
||||
}
|
||||
|
||||
private void renameKeysRecursively(String delimiter, JSONArray givenJsonObj) throws Exception {
|
||||
for (Object jjval : givenJsonObj) {
|
||||
if (jjval instanceof JSONArray)
|
||||
renameKeysRecursively(delimiter, (JSONArray) jjval);
|
||||
else if (jjval instanceof JSONObject)
|
||||
renameKeysRecursively(delimiter, (JSONObject) jjval);
|
||||
// All other types of vals
|
||||
else
|
||||
;
|
||||
}
|
||||
}
|
||||
|
||||
private void renameKeysRecursively(String delimiter, JSONObject givenJsonObj) throws Exception {
|
||||
Set<String> jkeys = new HashSet<String>(givenJsonObj.keySet());
|
||||
for (String jkey : jkeys) {
|
||||
System.out.println("++++> " + jkey);
|
||||
|
||||
String[] splitArray = jkey.split(delimiter);
|
||||
String newJkey = splitArray[splitArray.length - 1];
|
||||
System.out.println("New jkey: " + jkey);
|
||||
|
||||
Object jval = givenJsonObj.get(jkey);
|
||||
System.out.println("jval ===> " + jval.getClass().getName());
|
||||
givenJsonObj.remove(jkey);
|
||||
givenJsonObj.put(newJkey, jval);
|
||||
|
||||
if (jval instanceof JSONObject)
|
||||
renameKeysRecursively(delimiter, (JSONObject) jval);
|
||||
|
||||
if (jval instanceof JSONArray) {
|
||||
renameKeysRecursively(delimiter, (JSONArray) jval);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private String getJson(String url) throws Exception {
|
||||
// String cred=username+":"+password;
|
||||
// String encoded = new sun.misc.BASE64Encoder().encode (cred.getBytes());
|
||||
|
@ -279,10 +369,12 @@ public class SarcStats {
|
|||
return response.toString();
|
||||
} catch (Exception e) {
|
||||
|
||||
// Logging error and silently continuing
|
||||
log.error("Failed to get URL: " + e);
|
||||
System.out.println("Failed to get URL: " + e);
|
||||
// return null;
|
||||
throw new Exception("Failed to get URL: " + e.toString(), e);
|
||||
}
|
||||
// throw new Exception("Failed to get URL: " + e.toString(), e);
|
||||
}
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
|
|
@ -13,13 +13,14 @@ public class UsageStatsExporter {
|
|||
|
||||
static String matomoAuthToken = "703bd17d845acdaf795e01bb1e0895b9";
|
||||
static String matomoBaseURL = "analytics.openaire.eu";
|
||||
static String repoLogPath = "/user/spyros/logs/usage_stats_logs5/Repologs";
|
||||
static String portalLogPath = "/user/spyros/logs/usage_stats_logs5/Portallogs/";
|
||||
static String repoLogPath = "/user/spyros/logs/usage_stats_logs6/Repologs";
|
||||
static String portalLogPath = "/user/spyros/logs/usage_stats_logs6/Portallogs/";
|
||||
static String portalMatomoID = "109";
|
||||
static String irusUKBaseURL = "https://irus.jisc.ac.uk/api/sushilite/v1_7/";
|
||||
|
||||
static String irusUKReportPath = "/user/spyros/logs/usage_stats_logs5/irusUKReports";
|
||||
static String sarcsReportPath = "/user/spyros/logs/usage_stats_logs5/sarcReports";
|
||||
static String irusUKReportPath = "/user/spyros/logs/usage_stats_logs6/irusUKReports";
|
||||
static String sarcsReportPathArray = "/user/spyros/logs/usage_stats_logs6/sarcReports/Array";
|
||||
static String sarcsReportPathNonArray = "/user/spyros/logs/usage_stats_logs6/sarcReports/NonArray";
|
||||
|
||||
public UsageStatsExporter(Properties properties) {
|
||||
this.properties = properties;
|
||||
|
@ -59,7 +60,7 @@ public class UsageStatsExporter {
|
|||
// log.info("irus done");
|
||||
|
||||
SarcStats sarcStats = new SarcStats();
|
||||
sarcStats.processSarc(sarcsReportPath);
|
||||
sarcStats.processSarc(sarcsReportPathArray, sarcsReportPathNonArray);
|
||||
sarcStats.sarcStats();
|
||||
log.info("sarc done");
|
||||
|
||||
|
|
Loading…
Reference in New Issue