forked from D-Net/dnet-hadoop
Modifying JSON saving procedure to make the files usable by HIVE JsonSerDe
This commit is contained in:
parent
bf820a98b4
commit
b213da51c4
|
@ -26,6 +26,9 @@ import org.apache.hadoop.fs.FSDataOutputStream;
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
|
import org.json.simple.JSONArray;
|
||||||
|
import org.json.simple.JSONObject;
|
||||||
|
import org.json.simple.parser.JSONParser;
|
||||||
|
|
||||||
public class PiwikDownloadLogs {
|
public class PiwikDownloadLogs {
|
||||||
|
|
||||||
|
@ -65,9 +68,12 @@ public class PiwikDownloadLogs {
|
||||||
String inputLine;
|
String inputLine;
|
||||||
while ((inputLine = in.readLine()) != null) {
|
while ((inputLine = in.readLine()) != null) {
|
||||||
response.append(inputLine);
|
response.append(inputLine);
|
||||||
response.append("\n");
|
// response.append("\n");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
System.out.println("response ====> " + response.toString());
|
||||||
|
|
||||||
return response.toString();
|
return response.toString();
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
log.error("Failed to get URL: " + e);
|
log.error("Failed to get URL: " + e);
|
||||||
|
@ -87,7 +93,8 @@ public class PiwikDownloadLogs {
|
||||||
// int siteId = rs.getInt(1);
|
// int siteId = rs.getInt(1);
|
||||||
|
|
||||||
for (int manualSiteId : new int[] {
|
for (int manualSiteId : new int[] {
|
||||||
13, 23, 109
|
13, 23
|
||||||
|
// , 109 -> This seems to be empty
|
||||||
}) {
|
}) {
|
||||||
int siteId = manualSiteId; // Until the stats database (datasource table) provides a correct id, we set it
|
int siteId = manualSiteId; // Until the stats database (datasource table) provides a correct id, we set it
|
||||||
// to 13
|
// to 13
|
||||||
|
@ -138,7 +145,8 @@ public class PiwikDownloadLogs {
|
||||||
|
|
||||||
int i = 0;
|
int i = 0;
|
||||||
|
|
||||||
while (!content.equals("[]\n")) {
|
JSONParser parser = new JSONParser();
|
||||||
|
while (!content.equals("[]")) {
|
||||||
String apiUrl = baseApiUrl;
|
String apiUrl = baseApiUrl;
|
||||||
|
|
||||||
if (i > 0) {
|
if (i > 0) {
|
||||||
|
@ -147,12 +155,16 @@ public class PiwikDownloadLogs {
|
||||||
|
|
||||||
content = getJson(apiUrl);
|
content = getJson(apiUrl);
|
||||||
|
|
||||||
fin.write(content.getBytes());
|
JSONArray jsonArray = (JSONArray) parser.parse(content);
|
||||||
|
for (Object aJsonArray : jsonArray) {
|
||||||
|
JSONObject jsonObjectRaw = (JSONObject) aJsonArray;
|
||||||
|
fin.write(jsonObjectRaw.toJSONString().getBytes());
|
||||||
|
fin.writeChar('\n');
|
||||||
|
}
|
||||||
|
|
||||||
i++;
|
i++;
|
||||||
}
|
}
|
||||||
fin.close();
|
fin.close();
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -43,6 +43,8 @@ public class UsageStatsExporter {
|
||||||
PiwikDownloadLogs piwd = new PiwikDownloadLogs(matomoBaseURL, matomoAuthToken);
|
PiwikDownloadLogs piwd = new PiwikDownloadLogs(matomoBaseURL, matomoAuthToken);
|
||||||
piwd.GetOpenAIRELogs(repoLogPath, portalLogPath, portalMatomoID);
|
piwd.GetOpenAIRELogs(repoLogPath, portalLogPath, portalMatomoID);
|
||||||
|
|
||||||
|
System.exit(0);
|
||||||
|
|
||||||
// Create DB tables, insert/update statistics
|
// Create DB tables, insert/update statistics
|
||||||
// String cRobotsUrl = properties.getProperty("COUNTER_robots_Url");
|
// String cRobotsUrl = properties.getProperty("COUNTER_robots_Url");
|
||||||
String cRobotsUrl = "https://raw.githubusercontent.com/atmire/COUNTER-Robots/master/COUNTER_Robots_list.json";
|
String cRobotsUrl = "https://raw.githubusercontent.com/atmire/COUNTER-Robots/master/COUNTER_Robots_list.json";
|
||||||
|
|
Loading…
Reference in New Issue