From b213da51c44c30a82b300d82720301c6dcb1fcbf Mon Sep 17 00:00:00 2001 From: Spyros Zoupanos Date: Thu, 21 May 2020 21:49:33 +0300 Subject: [PATCH] Modifying JSON saving procedure to make the files usable by HIVE JsonSerDe --- .../usagestats/export/PiwikDownloadLogs.java | 22 ++++++++++++++----- .../usagestats/export/UsageStatsExporter.java | 2 ++ 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/PiwikDownloadLogs.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/PiwikDownloadLogs.java index a6b7099ad2..4b2875a2f9 100644 --- a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/PiwikDownloadLogs.java +++ b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/PiwikDownloadLogs.java @@ -26,6 +26,9 @@ import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.log4j.Logger; +import org.json.simple.JSONArray; +import org.json.simple.JSONObject; +import org.json.simple.parser.JSONParser; public class PiwikDownloadLogs { @@ -65,9 +68,12 @@ public class PiwikDownloadLogs { String inputLine; while ((inputLine = in.readLine()) != null) { response.append(inputLine); - response.append("\n"); +// response.append("\n"); } } + + System.out.println("response ====> " + response.toString()); + return response.toString(); } catch (Exception e) { log.error("Failed to get URL: " + e); @@ -87,7 +93,8 @@ public class PiwikDownloadLogs { // int siteId = rs.getInt(1); for (int manualSiteId : new int[] { - 13, 23, 109 + 13, 23 +// , 109 -> This seems to be empty }) { int siteId = manualSiteId; // Until the stats database (datasource table) provides a correct id, we set it // to 13 @@ -138,7 +145,8 @@ public class PiwikDownloadLogs { int i = 0; - while (!content.equals("[]\n")) { + JSONParser parser = new JSONParser(); + while (!content.equals("[]")) { String apiUrl = baseApiUrl; if (i > 0) { @@ -147,12 +155,16 @@ public class PiwikDownloadLogs { content = getJson(apiUrl); - fin.write(content.getBytes()); + JSONArray jsonArray = (JSONArray) parser.parse(content); + for (Object aJsonArray : jsonArray) { + JSONObject jsonObjectRaw = (JSONObject) aJsonArray; + fin.write(jsonObjectRaw.toJSONString().getBytes()); + fin.writeChar('\n'); + } i++; } fin.close(); - } } diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/UsageStatsExporter.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/UsageStatsExporter.java index 8f5f6d4968..dab55eb5b2 100644 --- a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/UsageStatsExporter.java +++ b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/UsageStatsExporter.java @@ -43,6 +43,8 @@ public class UsageStatsExporter { PiwikDownloadLogs piwd = new PiwikDownloadLogs(matomoBaseURL, matomoAuthToken); piwd.GetOpenAIRELogs(repoLogPath, portalLogPath, portalMatomoID); + System.exit(0); + // Create DB tables, insert/update statistics // String cRobotsUrl = properties.getProperty("COUNTER_robots_Url"); String cRobotsUrl = "https://raw.githubusercontent.com/atmire/COUNTER-Robots/master/COUNTER_Robots_list.json";