forked from D-Net/dnet-hadoop
Removing the not needed download code that ignores SSL certificates and uses username/password for authentication. Repository ids are provided manually for the moment until the Hive stats DB provides the correct piwik_id
This commit is contained in:
parent
9cdea87c7a
commit
bf820a98b4
|
@ -75,78 +75,22 @@ public class PiwikDownloadLogs {
|
|||
}
|
||||
}
|
||||
|
||||
public static String getJson2(String url) throws Exception {
|
||||
try {
|
||||
|
||||
// Trust all certificates
|
||||
TrustManager[] trustAllCerts = new TrustManager[] {
|
||||
new X509TrustManager() {
|
||||
public java.security.cert.X509Certificate[] getAcceptedIssuers() {
|
||||
return null;
|
||||
}
|
||||
|
||||
public void checkClientTrusted(X509Certificate[] certs, String authType) {
|
||||
}
|
||||
|
||||
public void checkServerTrusted(X509Certificate[] certs, String authType) {
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Provide username & password until there is IP authentication
|
||||
Authenticator.setDefault(new Authenticator() {
|
||||
protected PasswordAuthentication getPasswordAuthentication() {
|
||||
return new PasswordAuthentication("spyros", "XXXXXXXXXX".toCharArray());
|
||||
}
|
||||
});
|
||||
|
||||
SSLContext sc = SSLContext.getInstance("SSL");
|
||||
sc.init(null, trustAllCerts, new java.security.SecureRandom());
|
||||
HttpsURLConnection.setDefaultSSLSocketFactory(sc.getSocketFactory());
|
||||
|
||||
// Create all-trusting host name verifier
|
||||
HostnameVerifier allHostsValid = new HostnameVerifier() {
|
||||
public boolean verify(String hostname, SSLSession session) {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
// Install the all-trusting host verifier
|
||||
HttpsURLConnection.setDefaultHostnameVerifier(allHostsValid);
|
||||
/* End of the fix */
|
||||
|
||||
System.out.println("===> Connecting to: " + url);
|
||||
URL website = new URL(url);
|
||||
System.out.println("Connection url -----> " + url);
|
||||
URLConnection connection = website.openConnection();
|
||||
|
||||
// connection.setRequestProperty ("Authorization", "Basic "+encoded);
|
||||
StringBuilder response;
|
||||
try (BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream()))) {
|
||||
response = new StringBuilder();
|
||||
String inputLine;
|
||||
while ((inputLine = in.readLine()) != null) {
|
||||
response.append(inputLine);
|
||||
response.append("\n");
|
||||
}
|
||||
}
|
||||
return response.toString();
|
||||
} catch (Exception e) {
|
||||
System.out.println("Failed to get URL: " + e);
|
||||
throw new Exception("Failed to get URL: " + e.toString(), e);
|
||||
}
|
||||
}
|
||||
|
||||
public void GetOpenAIRELogs(String repoLogsPath, String portalLogPath, String portalMatomoID) throws Exception {
|
||||
|
||||
Statement statement = ConnectDB.getConnection().createStatement();
|
||||
|
||||
ResultSet rs = statement
|
||||
.executeQuery(
|
||||
"SELECT distinct piwik_id from " + ConnectDB.getStatsDBSchema()
|
||||
+ ".datasource where piwik_id is not null order by piwik_id");
|
||||
while (rs.next()) {
|
||||
// ResultSet rs = statement
|
||||
// .executeQuery(
|
||||
// "SELECT distinct piwik_id from " + ConnectDB.getStatsDBSchema()
|
||||
// + ".datasource where piwik_id is not null order by piwik_id");
|
||||
// while (rs.next()) {
|
||||
// int siteId = rs.getInt(1);
|
||||
int siteId = 13; // Until the stats database (datasource table) provides a correct id, we set it to 13
|
||||
|
||||
for (int manualSiteId : new int[] {
|
||||
13, 23, 109
|
||||
}) {
|
||||
int siteId = manualSiteId; // Until the stats database (datasource table) provides a correct id, we set it
|
||||
// to 13
|
||||
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM");
|
||||
|
||||
Calendar start = Calendar.getInstance();
|
||||
|
@ -201,7 +145,7 @@ public class PiwikDownloadLogs {
|
|||
apiUrl += "&filter_offset=" + (i * 1000);
|
||||
}
|
||||
|
||||
content = getJson2(apiUrl);
|
||||
content = getJson(apiUrl);
|
||||
|
||||
fin.write(content.getBytes());
|
||||
|
||||
|
|
|
@ -131,8 +131,8 @@ public class PiwikStatsDB {
|
|||
+ "process_portal_log_tmp.\"timestamp\" "
|
||||
+ "FROM process_portal_log_tmp "
|
||||
+ "WHERE process_portal_log_tmp.source = new.source AND process_portal_log_tmp.id_visit = new.id_visit AND process_portal_log_tmp.\"timestamp\" = new.\"timestamp\")) DO INSTEAD NOTHING;";
|
||||
// stmt.executeUpdate(sqlCreateTmpTablePortalLog); --> We need to find a way to eliminate duplicates
|
||||
// stmt.executeUpdate(sqlcreateTmpRulePortalLog); --> We probably don't need indexes
|
||||
stmt.executeUpdate(sqlCreateTmpTablePortalLog);
|
||||
// stmt.executeUpdate(sqlcreateTmpRulePortalLog); --> We need to find a way to eliminate duplicates
|
||||
|
||||
stmt.close();
|
||||
log.info("Usage Tmp Tables Created");
|
||||
|
|
|
@ -38,12 +38,15 @@ public class UsageStatsExporter {
|
|||
// Create DB tables - they are also needed to download the statistics too
|
||||
PiwikStatsDB piwikstatsdb = new PiwikStatsDB(repoLogPath, portalLogPath);
|
||||
|
||||
// Download the statistics
|
||||
// Download the statistics - The following 2 lines are not needed after the download - Commenting them out for
|
||||
// the moment
|
||||
PiwikDownloadLogs piwd = new PiwikDownloadLogs(matomoBaseURL, matomoAuthToken);
|
||||
piwd.GetOpenAIRELogs(repoLogPath, portalLogPath, portalMatomoID);
|
||||
|
||||
// Create DB tables, insert/update statistics
|
||||
piwikstatsdb.setCounterRobotsURL(properties.getProperty("COUNTER_robots_Url"));
|
||||
// String cRobotsUrl = properties.getProperty("COUNTER_robots_Url");
|
||||
String cRobotsUrl = "https://raw.githubusercontent.com/atmire/COUNTER-Robots/master/COUNTER_Robots_list.json";
|
||||
piwikstatsdb.setCounterRobotsURL(cRobotsUrl);
|
||||
piwikstatsdb.processLogs();
|
||||
log.info("process logs done");
|
||||
|
||||
|
|
Loading…
Reference in New Issue