Modifying JSON saving procedure to make the files usable by HIVE JsonSerDe

This commit is contained in:
Spyros Zoupanos 2020-05-21 21:49:33 +03:00
parent bf820a98b4
commit b213da51c4
2 changed files with 19 additions and 5 deletions

View File

@ -26,6 +26,9 @@ import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.json.simple.JSONArray;
import org.json.simple.JSONObject;
import org.json.simple.parser.JSONParser;
public class PiwikDownloadLogs { public class PiwikDownloadLogs {
@ -65,9 +68,12 @@ public class PiwikDownloadLogs {
String inputLine; String inputLine;
while ((inputLine = in.readLine()) != null) { while ((inputLine = in.readLine()) != null) {
response.append(inputLine); response.append(inputLine);
response.append("\n"); // response.append("\n");
} }
} }
System.out.println("response ====> " + response.toString());
return response.toString(); return response.toString();
} catch (Exception e) { } catch (Exception e) {
log.error("Failed to get URL: " + e); log.error("Failed to get URL: " + e);
@ -87,7 +93,8 @@ public class PiwikDownloadLogs {
// int siteId = rs.getInt(1); // int siteId = rs.getInt(1);
for (int manualSiteId : new int[] { for (int manualSiteId : new int[] {
13, 23, 109 13, 23
// , 109 -> This seems to be empty
}) { }) {
int siteId = manualSiteId; // Until the stats database (datasource table) provides a correct id, we set it int siteId = manualSiteId; // Until the stats database (datasource table) provides a correct id, we set it
// to 13 // to 13
@ -138,7 +145,8 @@ public class PiwikDownloadLogs {
int i = 0; int i = 0;
while (!content.equals("[]\n")) { JSONParser parser = new JSONParser();
while (!content.equals("[]")) {
String apiUrl = baseApiUrl; String apiUrl = baseApiUrl;
if (i > 0) { if (i > 0) {
@ -147,12 +155,16 @@ public class PiwikDownloadLogs {
content = getJson(apiUrl); content = getJson(apiUrl);
fin.write(content.getBytes()); JSONArray jsonArray = (JSONArray) parser.parse(content);
for (Object aJsonArray : jsonArray) {
JSONObject jsonObjectRaw = (JSONObject) aJsonArray;
fin.write(jsonObjectRaw.toJSONString().getBytes());
fin.writeChar('\n');
}
i++; i++;
} }
fin.close(); fin.close();
} }
} }

View File

@ -43,6 +43,8 @@ public class UsageStatsExporter {
PiwikDownloadLogs piwd = new PiwikDownloadLogs(matomoBaseURL, matomoAuthToken); PiwikDownloadLogs piwd = new PiwikDownloadLogs(matomoBaseURL, matomoAuthToken);
piwd.GetOpenAIRELogs(repoLogPath, portalLogPath, portalMatomoID); piwd.GetOpenAIRELogs(repoLogPath, portalLogPath, portalMatomoID);
System.exit(0);
// Create DB tables, insert/update statistics // Create DB tables, insert/update statistics
// String cRobotsUrl = properties.getProperty("COUNTER_robots_Url"); // String cRobotsUrl = properties.getProperty("COUNTER_robots_Url");
String cRobotsUrl = "https://raw.githubusercontent.com/atmire/COUNTER-Robots/master/COUNTER_Robots_list.json"; String cRobotsUrl = "https://raw.githubusercontent.com/atmire/COUNTER-Robots/master/COUNTER_Robots_list.json";