diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/PiwikDownloadLogs.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/PiwikDownloadLogs.java index cd531f868..9e423abd3 100644 --- a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/PiwikDownloadLogs.java +++ b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/PiwikDownloadLogs.java @@ -64,27 +64,22 @@ public class PiwikDownloadLogs { private String getJson(String url) throws Exception { try { - logger.info("Connecting to download the JSON: " + url); + logger.debug("Connecting to download the JSON: " + url); URL website = new URL(url); URLConnection connection = website.openConnection(); - // connection.setRequestProperty ("Authorization", "Basic "+encoded); StringBuilder response; try (BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream()))) { response = new StringBuilder(); String inputLine; while ((inputLine = in.readLine()) != null) { response.append(inputLine); -// response.append("\n"); } } - -// System.out.println("response ====> " + response.toString()); - return response.toString(); } catch (Exception e) { - logger.error("Failed to get URL: " + e); - throw new Exception("Failed to get URL: " + e.toString(), e); + logger.error("Failed to get URL: " + url + " Exception: " + e); + throw new Exception("Failed to get URL: " + url + " Exception: " + e.toString(), e); } } @@ -105,10 +100,11 @@ public class PiwikDownloadLogs { } public void run() { + SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd"); System.out .println( Thread.currentThread().getName() + " (Start) Thread for " - + "parameters: currDay=" + currDay + ", siteId=" + siteId + + + "parameters: currDay=" + sdf.format(currDay.getTime()) + ", siteId=" + siteId + ", repoLogsPath=" + repoLogsPath + ", portalLogPath=" + portalLogPath + ", portalLogPath=" + portalLogPath + ", portalMatomoID=" + portalMatomoID); try { @@ -121,7 +117,7 @@ public class PiwikDownloadLogs { System.out .println( Thread.currentThread().getName() + " (End) Thread for " - + "parameters: currDay=" + currDay + ", siteId=" + siteId + + + "parameters: currDay=" + sdf.format(currDay.getTime()) + ", siteId=" + siteId + ", repoLogsPath=" + repoLogsPath + ", portalLogPath=" + portalLogPath + ", portalLogPath=" + portalLogPath + ", portalMatomoID=" + portalMatomoID); } @@ -151,9 +147,9 @@ public class PiwikDownloadLogs { JSONParser parser = new JSONParser(); StringBuffer totalContent = new StringBuffer(); FileSystem fs = FileSystem.get(new Configuration()); - FSDataOutputStream fin = fs - .create(new Path(outFolder + "/" + siteId + "_Piwiklog" + sdf.format((date)) + ".json"), true); + do { + int writtenBytes = 0; String apiUrl = baseApiUrl; if (i > 0) { @@ -164,23 +160,31 @@ public class PiwikDownloadLogs { if (content.length() == 0 || content.equals("[]")) break; + FSDataOutputStream fin = fs + .create( + new Path(outFolder + "/" + siteId + "_Piwiklog" + sdf.format((date)) + "_offset_" + i + + ".json"), + true); JSONArray jsonArray = (JSONArray) parser.parse(content); for (Object aJsonArray : jsonArray) { JSONObject jsonObjectRaw = (JSONObject) aJsonArray; - fin.write(jsonObjectRaw.toJSONString().getBytes()); + byte[] jsonObjectRawBytes = jsonObjectRaw.toJSONString().getBytes(); + fin.write(jsonObjectRawBytes); fin.writeChar('\n'); -// totalContent.append(jsonObjectRaw.toJSONString()); -// totalContent.append('\n'); + + writtenBytes += jsonObjectRawBytes.length + 1; } + + fin.close(); + System.out + .println( + Thread.currentThread().getName() + " (Finished writing) Wrote " + writtenBytes + + " bytes. Filename: " + siteId + "_Piwiklog" + sdf.format((date)) + "_offset_" + i + + ".json"); + i++; } while (true); -// FileSystem fs = FileSystem.get(new Configuration()); -// FSDataOutputStream fin = fs -// .create(new Path(outFolder + "/" + siteId + "_Piwiklog" + sdf.format((date)) + ".json"), true); -// -// fin.write(totalContent.toString().getBytes()); - fin.close(); fs.close(); } } @@ -221,7 +225,7 @@ public class PiwikDownloadLogs { logger.info("Ending period for log download: " + sdf.format(end.getTime())); // FileSystem fs = FileSystem.get(new Configuration()); - ExecutorService executor = Executors.newFixedThreadPool(20);// creating a pool of 5 threadsσ + ExecutorService executor = Executors.newFixedThreadPool(10);// creating a pool of 5 threadsσ for (int siteId : piwikIdToVisit) { logger.info("Now working on piwikId: " + siteId); @@ -242,7 +246,6 @@ public class PiwikDownloadLogs { rs_date.close(); for (Calendar currDay = (Calendar) start.clone(); currDay.before(end); currDay.add(Calendar.DATE, 1)) { -// Runnable worker = new WorkerThread(currDay, siteId, repoLogsPath, portalLogPath, portalMatomoID, fs); Runnable worker = new WorkerThread(currDay, siteId, repoLogsPath, portalLogPath, portalMatomoID); executor.execute(worker);// calling execute method of ExecutorService } diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/PiwikStatsDB.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/PiwikStatsDB.java index 63df64ac4..308881ee0 100644 --- a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/PiwikStatsDB.java +++ b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/PiwikStatsDB.java @@ -225,17 +225,6 @@ public class PiwikStatsDB { } } -// public void usageStats() throws Exception { -// try { -// viewsStats(); -// downloadsStats(); -// log.info("stat tables and views done"); -// } catch (Exception e) { -// log.error("Failed to create usage usagestats: " + e); -// throw new Exception("Failed to create usage usagestats: " + e.toString(), e); -// } -// } - public void processRepositoryLog() throws Exception { Statement stmt = ConnectDB.getHiveConnection().createStatement(); diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/SarcStats.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/SarcStats.java index d094dd270..8c203b237 100644 --- a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/SarcStats.java +++ b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/SarcStats.java @@ -94,8 +94,6 @@ public class SarcStats { stmt.executeUpdate("add jar /usr/share/cmf/common_jars/hive-hcatalog-core-1.1.0-cdh5.14.0.jar"); logger.info("Added JSON Serde jar"); -// " + issn.replace("-", "_" - logger.info("Dropping sarc_sushilogtmp_json_array table"); String drop_sarc_sushilogtmp_json_array = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".sarc_sushilogtmp_json_array";