forked from D-Net/dnet-hadoop
Removing the not needed download code that ignores SSL certificates and uses username/password for authentication. Repository ids are provided manually for the moment until the Hive stats DB provides the correct piwik_id
This commit is contained in:
parent
9cdea87c7a
commit
bf820a98b4
|
@ -75,78 +75,22 @@ public class PiwikDownloadLogs {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String getJson2(String url) throws Exception {
|
|
||||||
try {
|
|
||||||
|
|
||||||
// Trust all certificates
|
|
||||||
TrustManager[] trustAllCerts = new TrustManager[] {
|
|
||||||
new X509TrustManager() {
|
|
||||||
public java.security.cert.X509Certificate[] getAcceptedIssuers() {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void checkClientTrusted(X509Certificate[] certs, String authType) {
|
|
||||||
}
|
|
||||||
|
|
||||||
public void checkServerTrusted(X509Certificate[] certs, String authType) {
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// Provide username & password until there is IP authentication
|
|
||||||
Authenticator.setDefault(new Authenticator() {
|
|
||||||
protected PasswordAuthentication getPasswordAuthentication() {
|
|
||||||
return new PasswordAuthentication("spyros", "XXXXXXXXXX".toCharArray());
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
SSLContext sc = SSLContext.getInstance("SSL");
|
|
||||||
sc.init(null, trustAllCerts, new java.security.SecureRandom());
|
|
||||||
HttpsURLConnection.setDefaultSSLSocketFactory(sc.getSocketFactory());
|
|
||||||
|
|
||||||
// Create all-trusting host name verifier
|
|
||||||
HostnameVerifier allHostsValid = new HostnameVerifier() {
|
|
||||||
public boolean verify(String hostname, SSLSession session) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
// Install the all-trusting host verifier
|
|
||||||
HttpsURLConnection.setDefaultHostnameVerifier(allHostsValid);
|
|
||||||
/* End of the fix */
|
|
||||||
|
|
||||||
System.out.println("===> Connecting to: " + url);
|
|
||||||
URL website = new URL(url);
|
|
||||||
System.out.println("Connection url -----> " + url);
|
|
||||||
URLConnection connection = website.openConnection();
|
|
||||||
|
|
||||||
// connection.setRequestProperty ("Authorization", "Basic "+encoded);
|
|
||||||
StringBuilder response;
|
|
||||||
try (BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream()))) {
|
|
||||||
response = new StringBuilder();
|
|
||||||
String inputLine;
|
|
||||||
while ((inputLine = in.readLine()) != null) {
|
|
||||||
response.append(inputLine);
|
|
||||||
response.append("\n");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return response.toString();
|
|
||||||
} catch (Exception e) {
|
|
||||||
System.out.println("Failed to get URL: " + e);
|
|
||||||
throw new Exception("Failed to get URL: " + e.toString(), e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public void GetOpenAIRELogs(String repoLogsPath, String portalLogPath, String portalMatomoID) throws Exception {
|
public void GetOpenAIRELogs(String repoLogsPath, String portalLogPath, String portalMatomoID) throws Exception {
|
||||||
|
|
||||||
Statement statement = ConnectDB.getConnection().createStatement();
|
Statement statement = ConnectDB.getConnection().createStatement();
|
||||||
|
|
||||||
ResultSet rs = statement
|
// ResultSet rs = statement
|
||||||
.executeQuery(
|
// .executeQuery(
|
||||||
"SELECT distinct piwik_id from " + ConnectDB.getStatsDBSchema()
|
// "SELECT distinct piwik_id from " + ConnectDB.getStatsDBSchema()
|
||||||
+ ".datasource where piwik_id is not null order by piwik_id");
|
// + ".datasource where piwik_id is not null order by piwik_id");
|
||||||
while (rs.next()) {
|
// while (rs.next()) {
|
||||||
// int siteId = rs.getInt(1);
|
// int siteId = rs.getInt(1);
|
||||||
int siteId = 13; // Until the stats database (datasource table) provides a correct id, we set it to 13
|
|
||||||
|
for (int manualSiteId : new int[] {
|
||||||
|
13, 23, 109
|
||||||
|
}) {
|
||||||
|
int siteId = manualSiteId; // Until the stats database (datasource table) provides a correct id, we set it
|
||||||
|
// to 13
|
||||||
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM");
|
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM");
|
||||||
|
|
||||||
Calendar start = Calendar.getInstance();
|
Calendar start = Calendar.getInstance();
|
||||||
|
@ -201,7 +145,7 @@ public class PiwikDownloadLogs {
|
||||||
apiUrl += "&filter_offset=" + (i * 1000);
|
apiUrl += "&filter_offset=" + (i * 1000);
|
||||||
}
|
}
|
||||||
|
|
||||||
content = getJson2(apiUrl);
|
content = getJson(apiUrl);
|
||||||
|
|
||||||
fin.write(content.getBytes());
|
fin.write(content.getBytes());
|
||||||
|
|
||||||
|
|
|
@ -131,8 +131,8 @@ public class PiwikStatsDB {
|
||||||
+ "process_portal_log_tmp.\"timestamp\" "
|
+ "process_portal_log_tmp.\"timestamp\" "
|
||||||
+ "FROM process_portal_log_tmp "
|
+ "FROM process_portal_log_tmp "
|
||||||
+ "WHERE process_portal_log_tmp.source = new.source AND process_portal_log_tmp.id_visit = new.id_visit AND process_portal_log_tmp.\"timestamp\" = new.\"timestamp\")) DO INSTEAD NOTHING;";
|
+ "WHERE process_portal_log_tmp.source = new.source AND process_portal_log_tmp.id_visit = new.id_visit AND process_portal_log_tmp.\"timestamp\" = new.\"timestamp\")) DO INSTEAD NOTHING;";
|
||||||
// stmt.executeUpdate(sqlCreateTmpTablePortalLog); --> We need to find a way to eliminate duplicates
|
stmt.executeUpdate(sqlCreateTmpTablePortalLog);
|
||||||
// stmt.executeUpdate(sqlcreateTmpRulePortalLog); --> We probably don't need indexes
|
// stmt.executeUpdate(sqlcreateTmpRulePortalLog); --> We need to find a way to eliminate duplicates
|
||||||
|
|
||||||
stmt.close();
|
stmt.close();
|
||||||
log.info("Usage Tmp Tables Created");
|
log.info("Usage Tmp Tables Created");
|
||||||
|
|
|
@ -38,12 +38,15 @@ public class UsageStatsExporter {
|
||||||
// Create DB tables - they are also needed to download the statistics too
|
// Create DB tables - they are also needed to download the statistics too
|
||||||
PiwikStatsDB piwikstatsdb = new PiwikStatsDB(repoLogPath, portalLogPath);
|
PiwikStatsDB piwikstatsdb = new PiwikStatsDB(repoLogPath, portalLogPath);
|
||||||
|
|
||||||
// Download the statistics
|
// Download the statistics - The following 2 lines are not needed after the download - Commenting them out for
|
||||||
|
// the moment
|
||||||
PiwikDownloadLogs piwd = new PiwikDownloadLogs(matomoBaseURL, matomoAuthToken);
|
PiwikDownloadLogs piwd = new PiwikDownloadLogs(matomoBaseURL, matomoAuthToken);
|
||||||
piwd.GetOpenAIRELogs(repoLogPath, portalLogPath, portalMatomoID);
|
piwd.GetOpenAIRELogs(repoLogPath, portalLogPath, portalMatomoID);
|
||||||
|
|
||||||
// Create DB tables, insert/update statistics
|
// Create DB tables, insert/update statistics
|
||||||
piwikstatsdb.setCounterRobotsURL(properties.getProperty("COUNTER_robots_Url"));
|
// String cRobotsUrl = properties.getProperty("COUNTER_robots_Url");
|
||||||
|
String cRobotsUrl = "https://raw.githubusercontent.com/atmire/COUNTER-Robots/master/COUNTER_Robots_list.json";
|
||||||
|
piwikstatsdb.setCounterRobotsURL(cRobotsUrl);
|
||||||
piwikstatsdb.processLogs();
|
piwikstatsdb.processLogs();
|
||||||
log.info("process logs done");
|
log.info("process logs done");
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue