From 17acbb7fc67bb6ad19be0b32635c7f91aaa08777 Mon Sep 17 00:00:00 2001 From: Spyros Zoupanos Date: Wed, 16 Sep 2020 20:30:36 +0300 Subject: [PATCH] Schema separation on sarc stats that are downloaded --- .../oa/graph/usagestats/export/SarcStats.java | 168 ++++++++++++++---- .../usagestats/export/UsageStatsExporter.java | 11 +- 2 files changed, 136 insertions(+), 43 deletions(-) diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/SarcStats.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/SarcStats.java index 994bea0634..4efa7a35a1 100644 --- a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/SarcStats.java +++ b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/SarcStats.java @@ -11,6 +11,7 @@ import java.sql.ResultSet; import java.sql.Statement; import java.text.SimpleDateFormat; import java.util.Calendar; +import java.util.HashSet; import java.util.Set; import org.apache.hadoop.conf.Configuration; @@ -21,6 +22,7 @@ import org.apache.log4j.Logger; import org.json.simple.JSONArray; import org.json.simple.JSONObject; import org.json.simple.parser.JSONParser; +import org.json.simple.parser.ParseException; /** * Created by dpie @@ -63,25 +65,59 @@ public class SarcStats { } } - public void processSarc(String sarcsReportPath) throws Exception { + public void processSarc(String sarcsReportPathArray, String sarcsReportPathNonArray) throws Exception { // There was a problem to download the following file -// processARReport(sarcsReportPath, "https://revistas.rcaap.pt/motricidade/sushiLite/v1_7/", "1646-107X"); - processARReport(sarcsReportPath, "https://revistas.rcaap.pt/antropologicas/sushiLite/v1_7/", "0873-819X"); - processARReport(sarcsReportPath, "https://revistas.rcaap.pt/interaccoes/sushiLite/v1_7/", "1646-2335"); - processARReport(sarcsReportPath, "https://revistas.rcaap.pt/cct/sushiLite/v1_7/", "2182-3030"); - processARReport(sarcsReportPath, "https://actapediatrica.spp.pt/sushiLite/v1_7/", "0873-9781"); - processARReport(sarcsReportPath, "https://revistas.rcaap.pt/sociologiapp/sushiLite/v1_7/", "0873-6529"); - processARReport(sarcsReportPath, "https://revistas.rcaap.pt/finisterra/sushiLite/v1_7/", "0430-5027"); - processARReport(sarcsReportPath, "https://revistas.rcaap.pt/sisyphus/sushiLite/v1_7/", "2182-8474"); - processARReport(sarcsReportPath, "https://revistas.rcaap.pt/anestesiologia/sushiLite/v1_7/", "0871-6099"); - processARReport(sarcsReportPath, "https://revistas.rcaap.pt/rpe/sushiLite/v1_7/", "0871-9187"); - processARReport(sarcsReportPath, "https://revistas.rcaap.pt/psilogos/sushiLite/v1_7/", "1646-091X"); - processARReport(sarcsReportPath, "https://revistas.rcaap.pt/juridica/sushiLite/v1_7/", "2183-5799"); - processARReport(sarcsReportPath, "https://revistas.rcaap.pt/ecr/sushiLite/v1_7/", "1647-2098"); - processARReport(sarcsReportPath, "https://revistas.rcaap.pt/nascercrescer/sushiLite/v1_7/", "0872-0754"); - processARReport(sarcsReportPath, "https://revistas.rcaap.pt/cea/sushiLite/v1_7/", "1645-3794"); - processARReport(sarcsReportPath, "https://revistas.rcaap.pt/proelium/sushiLite/v1_7/", "1645-8826"); - processARReport(sarcsReportPath, "https://revistas.rcaap.pt/millenium/sushiLite/v1_7/", "0873-3015"); + processARReport( + sarcsReportPathArray, sarcsReportPathNonArray, "https://revistas.rcaap.pt/motricidade/sushiLite/v1_7/", + "1646-107X"); + processARReport( + sarcsReportPathArray, sarcsReportPathNonArray, "https://revistas.rcaap.pt/antropologicas/sushiLite/v1_7/", + "0873-819X"); + processARReport( + sarcsReportPathArray, sarcsReportPathNonArray, "https://revistas.rcaap.pt/interaccoes/sushiLite/v1_7/", + "1646-2335"); + processARReport( + sarcsReportPathArray, sarcsReportPathNonArray, "https://revistas.rcaap.pt/cct/sushiLite/v1_7/", + "2182-3030"); + processARReport( + sarcsReportPathArray, sarcsReportPathNonArray, "https://actapediatrica.spp.pt/sushiLite/v1_7/", + "0873-9781"); + processARReport( + sarcsReportPathArray, sarcsReportPathNonArray, "https://revistas.rcaap.pt/sociologiapp/sushiLite/v1_7/", + "0873-6529"); + processARReport( + sarcsReportPathArray, sarcsReportPathNonArray, "https://revistas.rcaap.pt/finisterra/sushiLite/v1_7/", + "0430-5027"); + processARReport( + sarcsReportPathArray, sarcsReportPathNonArray, "https://revistas.rcaap.pt/sisyphus/sushiLite/v1_7/", + "2182-8474"); + processARReport( + sarcsReportPathArray, sarcsReportPathNonArray, "https://revistas.rcaap.pt/anestesiologia/sushiLite/v1_7/", + "0871-6099"); + processARReport( + sarcsReportPathArray, sarcsReportPathNonArray, "https://revistas.rcaap.pt/rpe/sushiLite/v1_7/", + "0871-9187"); + processARReport( + sarcsReportPathArray, sarcsReportPathNonArray, "https://revistas.rcaap.pt/psilogos/sushiLite/v1_7/", + "1646-091X"); + processARReport( + sarcsReportPathArray, sarcsReportPathNonArray, "https://revistas.rcaap.pt/juridica/sushiLite/v1_7/", + "2183-5799"); + processARReport( + sarcsReportPathArray, sarcsReportPathNonArray, "https://revistas.rcaap.pt/ecr/sushiLite/v1_7/", + "1647-2098"); + processARReport( + sarcsReportPathArray, sarcsReportPathNonArray, "https://revistas.rcaap.pt/nascercrescer/sushiLite/v1_7/", + "0872-0754"); + processARReport( + sarcsReportPathArray, sarcsReportPathNonArray, "https://revistas.rcaap.pt/cea/sushiLite/v1_7/", + "1645-3794"); + processARReport( + sarcsReportPathArray, sarcsReportPathNonArray, "https://revistas.rcaap.pt/proelium/sushiLite/v1_7/", + "1645-8826"); + processARReport( + sarcsReportPathArray, sarcsReportPathNonArray, "https://revistas.rcaap.pt/millenium/sushiLite/v1_7/", + "0873-3015"); } public void sarcStats() throws Exception { @@ -100,7 +136,8 @@ public class SarcStats { ConnectDB.getConnection().close(); } - public void processARReport(String sarcsReportPath, String url, String issn) throws Exception { + public void processARReport(String sarcsReportPathArray, String sarcsReportPathNonArray, + String url, String issn) throws Exception { log.info("Processing SARC! issn: " + issn + " with url: " + url); ConnectDB.getConnection().setAutoCommit(false); @@ -155,8 +192,19 @@ public class SarcStats { * PrintWriter wr = new PrintWriter(new FileWriter("logs/" + simpleDateFormat.format(start.getTime()) + * ".json")); wr.print(text); wr.close(); */ + + System.out.println("AAAAAAAAAAA text " + text); + JSONParser parser = new JSONParser(); - JSONObject jsonObject = (JSONObject) parser.parse(text); + JSONObject jsonObject = null; + try { + jsonObject = (JSONObject) parser.parse(text); + } + // if there is a parsing error continue with the next url + catch (ParseException pe) { + continue; + } + jsonObject = (JSONObject) jsonObject.get("sc:ReportResponse"); jsonObject = (JSONObject) jsonObject.get("sc:Report"); if (jsonObject == null) { @@ -176,30 +224,37 @@ public class SarcStats { continue; } - // Creating the file in the filesystem - FileSystem fs = FileSystem.get(new Configuration()); - String filePath = sarcsReportPath + "/" + "SarcsARReport_" + + // Creating the file in the filesystem for the ItemIdentifier as array object + FileSystem fsArray = FileSystem.get(new Configuration()); + String filePathArray = sarcsReportPathArray + "/" + "SarcsARReport_" + simpleDateFormat.format(start.getTime()) + ".json"; - System.out.println("Storing to file: " + filePath); - FSDataOutputStream fin = fs.create(new Path(filePath), true); + System.out.println("Storing to file: " + filePathArray); + FSDataOutputStream finArray = fsArray.create(new Path(filePathArray), true); + + // Creating the file in the filesystem for the ItemIdentifier as array object + FileSystem fsNonArray = FileSystem.get(new Configuration()); + String filePathNonArray = sarcsReportPathNonArray + "/" + "SarcsARReport_" + + simpleDateFormat.format(start.getTime()) + ".json"; + System.out.println("Storing to file: " + filePathNonArray); + FSDataOutputStream finNonArray = fsNonArray.create(new Path(filePathNonArray), true); String rid = ""; for (Object aJsonArray : jsonArray) { JSONObject jsonObjectRow = (JSONObject) aJsonArray; + renameKeysRecursively(":", jsonObjectRow); + System.out.println("oooo====> " + jsonObjectRow.toJSONString()); - Set jkeys = jsonObjectRow.keySet(); - for (String jkey : jkeys) { - System.out.println("++++> " + jkey); - String[] splitArray = jkey.split("c:"); - jkey = splitArray[splitArray.length - 1]; - System.out.println("New jkey: " + jkey); + if (jsonObjectRow.get("ItemIdentifier") instanceof JSONObject) { + finNonArray.write(jsonObjectRow.toJSONString().getBytes()); + finNonArray.writeChar('\n'); + } else { + finArray.write(jsonObjectRow.toJSONString().getBytes()); + finArray.writeChar('\n'); } - - fin.write(jsonObjectRow.toJSONString().getBytes()); - fin.writeChar('\n'); } - fin.close(); + finArray.close(); + ////////////////// // JSONObject jsonObjectRow = (JSONObject) aJsonArray; // JSONArray itemIdentifier = new JSONArray(); @@ -254,12 +309,47 @@ public class SarcStats { // break; } - preparedStatement.executeBatch(); - ConnectDB.getConnection().commit(); +// preparedStatement.executeBatch(); +// ConnectDB.getConnection().commit(); ConnectDB.getConnection().close(); } + private void renameKeysRecursively(String delimiter, JSONArray givenJsonObj) throws Exception { + for (Object jjval : givenJsonObj) { + if (jjval instanceof JSONArray) + renameKeysRecursively(delimiter, (JSONArray) jjval); + else if (jjval instanceof JSONObject) + renameKeysRecursively(delimiter, (JSONObject) jjval); + // All other types of vals + else + ; + } + } + + private void renameKeysRecursively(String delimiter, JSONObject givenJsonObj) throws Exception { + Set jkeys = new HashSet(givenJsonObj.keySet()); + for (String jkey : jkeys) { + System.out.println("++++> " + jkey); + + String[] splitArray = jkey.split(delimiter); + String newJkey = splitArray[splitArray.length - 1]; + System.out.println("New jkey: " + jkey); + + Object jval = givenJsonObj.get(jkey); + System.out.println("jval ===> " + jval.getClass().getName()); + givenJsonObj.remove(jkey); + givenJsonObj.put(newJkey, jval); + + if (jval instanceof JSONObject) + renameKeysRecursively(delimiter, (JSONObject) jval); + + if (jval instanceof JSONArray) { + renameKeysRecursively(delimiter, (JSONArray) jval); + } + } + } + private String getJson(String url) throws Exception { // String cred=username+":"+password; // String encoded = new sun.misc.BASE64Encoder().encode (cred.getBytes()); @@ -279,10 +369,12 @@ public class SarcStats { return response.toString(); } catch (Exception e) { + // Logging error and silently continuing log.error("Failed to get URL: " + e); System.out.println("Failed to get URL: " + e); // return null; - throw new Exception("Failed to get URL: " + e.toString(), e); +// throw new Exception("Failed to get URL: " + e.toString(), e); } + return ""; } } diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/UsageStatsExporter.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/UsageStatsExporter.java index c9a06ac713..80d8446fc1 100644 --- a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/UsageStatsExporter.java +++ b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/UsageStatsExporter.java @@ -13,13 +13,14 @@ public class UsageStatsExporter { static String matomoAuthToken = "703bd17d845acdaf795e01bb1e0895b9"; static String matomoBaseURL = "analytics.openaire.eu"; - static String repoLogPath = "/user/spyros/logs/usage_stats_logs5/Repologs"; - static String portalLogPath = "/user/spyros/logs/usage_stats_logs5/Portallogs/"; + static String repoLogPath = "/user/spyros/logs/usage_stats_logs6/Repologs"; + static String portalLogPath = "/user/spyros/logs/usage_stats_logs6/Portallogs/"; static String portalMatomoID = "109"; static String irusUKBaseURL = "https://irus.jisc.ac.uk/api/sushilite/v1_7/"; - static String irusUKReportPath = "/user/spyros/logs/usage_stats_logs5/irusUKReports"; - static String sarcsReportPath = "/user/spyros/logs/usage_stats_logs5/sarcReports"; + static String irusUKReportPath = "/user/spyros/logs/usage_stats_logs6/irusUKReports"; + static String sarcsReportPathArray = "/user/spyros/logs/usage_stats_logs6/sarcReports/Array"; + static String sarcsReportPathNonArray = "/user/spyros/logs/usage_stats_logs6/sarcReports/NonArray"; public UsageStatsExporter(Properties properties) { this.properties = properties; @@ -59,7 +60,7 @@ public class UsageStatsExporter { // log.info("irus done"); SarcStats sarcStats = new SarcStats(); - sarcStats.processSarc(sarcsReportPath); + sarcStats.processSarc(sarcsReportPathArray, sarcsReportPathNonArray); sarcStats.sarcStats(); log.info("sarc done");