More progress on download jsons. All certificates are ignored & authentication is done with username & pass

This commit is contained in:
Spyros Zoupanos 2020-05-16 13:16:16 +03:00
parent 66c7ddfc5e
commit 9cdea87c7a
1 changed files with 77 additions and 3 deletions

View File

@ -2,8 +2,11 @@
package eu.dnetlib.oa.graph.usagestats.export;
import java.io.*;
import java.net.Authenticator;
import java.net.PasswordAuthentication;
import java.net.URL;
import java.net.URLConnection;
import java.security.cert.X509Certificate;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.Statement;
@ -11,6 +14,13 @@ import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
import javax.net.ssl.HostnameVerifier;
import javax.net.ssl.HttpsURLConnection;
import javax.net.ssl.SSLContext;
import javax.net.ssl.SSLSession;
import javax.net.ssl.TrustManager;
import javax.net.ssl.X509TrustManager;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
@ -43,7 +53,9 @@ public class PiwikDownloadLogs {
private String getJson(String url) throws Exception {
try {
System.out.println("===> Connecting to: " + url);
URL website = new URL(url);
System.out.println("Connection url -----> " + url);
URLConnection connection = website.openConnection();
// connection.setRequestProperty ("Authorization", "Basic "+encoded);
@ -63,6 +75,67 @@ public class PiwikDownloadLogs {
}
}
public static String getJson2(String url) throws Exception {
try {
// Trust all certificates
TrustManager[] trustAllCerts = new TrustManager[] {
new X509TrustManager() {
public java.security.cert.X509Certificate[] getAcceptedIssuers() {
return null;
}
public void checkClientTrusted(X509Certificate[] certs, String authType) {
}
public void checkServerTrusted(X509Certificate[] certs, String authType) {
}
}
};
// Provide username & password until there is IP authentication
Authenticator.setDefault(new Authenticator() {
protected PasswordAuthentication getPasswordAuthentication() {
return new PasswordAuthentication("spyros", "XXXXXXXXXX".toCharArray());
}
});
SSLContext sc = SSLContext.getInstance("SSL");
sc.init(null, trustAllCerts, new java.security.SecureRandom());
HttpsURLConnection.setDefaultSSLSocketFactory(sc.getSocketFactory());
// Create all-trusting host name verifier
HostnameVerifier allHostsValid = new HostnameVerifier() {
public boolean verify(String hostname, SSLSession session) {
return true;
}
};
// Install the all-trusting host verifier
HttpsURLConnection.setDefaultHostnameVerifier(allHostsValid);
/* End of the fix */
System.out.println("===> Connecting to: " + url);
URL website = new URL(url);
System.out.println("Connection url -----> " + url);
URLConnection connection = website.openConnection();
// connection.setRequestProperty ("Authorization", "Basic "+encoded);
StringBuilder response;
try (BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream()))) {
response = new StringBuilder();
String inputLine;
while ((inputLine = in.readLine()) != null) {
response.append(inputLine);
response.append("\n");
}
}
return response.toString();
} catch (Exception e) {
System.out.println("Failed to get URL: " + e);
throw new Exception("Failed to get URL: " + e.toString(), e);
}
}
public void GetOpenAIRELogs(String repoLogsPath, String portalLogPath, String portalMatomoID) throws Exception {
Statement statement = ConnectDB.getConnection().createStatement();
@ -72,7 +145,8 @@ public class PiwikDownloadLogs {
"SELECT distinct piwik_id from " + ConnectDB.getStatsDBSchema()
+ ".datasource where piwik_id is not null order by piwik_id");
while (rs.next()) {
int siteId = rs.getInt(1);
// int siteId = rs.getInt(1);
int siteId = 13; // Until the stats database (datasource table) provides a correct id, we set it to 13
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM");
Calendar start = Calendar.getInstance();
@ -87,7 +161,7 @@ public class PiwikDownloadLogs {
PreparedStatement st = ConnectDB.DB_CONNECTION
.prepareStatement(
"SELECT max(timestamp) FROM " + ConnectDB.getUsageStatsDBSchema()
+ ".piwiklog WHERE source=? HAVING max(timestamp) is not null");
+ ".piwiklog WHERE source=? GROUP BY timestamp HAVING max(timestamp) is not null");
st.setInt(1, siteId);
ResultSet rs_date = st.executeQuery();
@ -127,7 +201,7 @@ public class PiwikDownloadLogs {
apiUrl += "&filter_offset=" + (i * 1000);
}
content = getJson(apiUrl);
content = getJson2(apiUrl);
fin.write(content.getBytes());