forked from D-Net/dnet-hadoop
More progress on download jsons. All certificates are ignored & authentication is done with username & pass
This commit is contained in:
parent
66c7ddfc5e
commit
9cdea87c7a
|
@ -2,8 +2,11 @@
|
|||
package eu.dnetlib.oa.graph.usagestats.export;
|
||||
|
||||
import java.io.*;
|
||||
import java.net.Authenticator;
|
||||
import java.net.PasswordAuthentication;
|
||||
import java.net.URL;
|
||||
import java.net.URLConnection;
|
||||
import java.security.cert.X509Certificate;
|
||||
import java.sql.PreparedStatement;
|
||||
import java.sql.ResultSet;
|
||||
import java.sql.Statement;
|
||||
|
@ -11,6 +14,13 @@ import java.text.SimpleDateFormat;
|
|||
import java.util.Calendar;
|
||||
import java.util.Date;
|
||||
|
||||
import javax.net.ssl.HostnameVerifier;
|
||||
import javax.net.ssl.HttpsURLConnection;
|
||||
import javax.net.ssl.SSLContext;
|
||||
import javax.net.ssl.SSLSession;
|
||||
import javax.net.ssl.TrustManager;
|
||||
import javax.net.ssl.X509TrustManager;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
|
@ -43,7 +53,9 @@ public class PiwikDownloadLogs {
|
|||
|
||||
private String getJson(String url) throws Exception {
|
||||
try {
|
||||
System.out.println("===> Connecting to: " + url);
|
||||
URL website = new URL(url);
|
||||
System.out.println("Connection url -----> " + url);
|
||||
URLConnection connection = website.openConnection();
|
||||
|
||||
// connection.setRequestProperty ("Authorization", "Basic "+encoded);
|
||||
|
@ -63,6 +75,67 @@ public class PiwikDownloadLogs {
|
|||
}
|
||||
}
|
||||
|
||||
public static String getJson2(String url) throws Exception {
|
||||
try {
|
||||
|
||||
// Trust all certificates
|
||||
TrustManager[] trustAllCerts = new TrustManager[] {
|
||||
new X509TrustManager() {
|
||||
public java.security.cert.X509Certificate[] getAcceptedIssuers() {
|
||||
return null;
|
||||
}
|
||||
|
||||
public void checkClientTrusted(X509Certificate[] certs, String authType) {
|
||||
}
|
||||
|
||||
public void checkServerTrusted(X509Certificate[] certs, String authType) {
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Provide username & password until there is IP authentication
|
||||
Authenticator.setDefault(new Authenticator() {
|
||||
protected PasswordAuthentication getPasswordAuthentication() {
|
||||
return new PasswordAuthentication("spyros", "XXXXXXXXXX".toCharArray());
|
||||
}
|
||||
});
|
||||
|
||||
SSLContext sc = SSLContext.getInstance("SSL");
|
||||
sc.init(null, trustAllCerts, new java.security.SecureRandom());
|
||||
HttpsURLConnection.setDefaultSSLSocketFactory(sc.getSocketFactory());
|
||||
|
||||
// Create all-trusting host name verifier
|
||||
HostnameVerifier allHostsValid = new HostnameVerifier() {
|
||||
public boolean verify(String hostname, SSLSession session) {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
// Install the all-trusting host verifier
|
||||
HttpsURLConnection.setDefaultHostnameVerifier(allHostsValid);
|
||||
/* End of the fix */
|
||||
|
||||
System.out.println("===> Connecting to: " + url);
|
||||
URL website = new URL(url);
|
||||
System.out.println("Connection url -----> " + url);
|
||||
URLConnection connection = website.openConnection();
|
||||
|
||||
// connection.setRequestProperty ("Authorization", "Basic "+encoded);
|
||||
StringBuilder response;
|
||||
try (BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream()))) {
|
||||
response = new StringBuilder();
|
||||
String inputLine;
|
||||
while ((inputLine = in.readLine()) != null) {
|
||||
response.append(inputLine);
|
||||
response.append("\n");
|
||||
}
|
||||
}
|
||||
return response.toString();
|
||||
} catch (Exception e) {
|
||||
System.out.println("Failed to get URL: " + e);
|
||||
throw new Exception("Failed to get URL: " + e.toString(), e);
|
||||
}
|
||||
}
|
||||
|
||||
public void GetOpenAIRELogs(String repoLogsPath, String portalLogPath, String portalMatomoID) throws Exception {
|
||||
|
||||
Statement statement = ConnectDB.getConnection().createStatement();
|
||||
|
@ -72,7 +145,8 @@ public class PiwikDownloadLogs {
|
|||
"SELECT distinct piwik_id from " + ConnectDB.getStatsDBSchema()
|
||||
+ ".datasource where piwik_id is not null order by piwik_id");
|
||||
while (rs.next()) {
|
||||
int siteId = rs.getInt(1);
|
||||
// int siteId = rs.getInt(1);
|
||||
int siteId = 13; // Until the stats database (datasource table) provides a correct id, we set it to 13
|
||||
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM");
|
||||
|
||||
Calendar start = Calendar.getInstance();
|
||||
|
@ -87,7 +161,7 @@ public class PiwikDownloadLogs {
|
|||
PreparedStatement st = ConnectDB.DB_CONNECTION
|
||||
.prepareStatement(
|
||||
"SELECT max(timestamp) FROM " + ConnectDB.getUsageStatsDBSchema()
|
||||
+ ".piwiklog WHERE source=? HAVING max(timestamp) is not null");
|
||||
+ ".piwiklog WHERE source=? GROUP BY timestamp HAVING max(timestamp) is not null");
|
||||
st.setInt(1, siteId);
|
||||
|
||||
ResultSet rs_date = st.executeQuery();
|
||||
|
@ -127,7 +201,7 @@ public class PiwikDownloadLogs {
|
|||
apiUrl += "&filter_offset=" + (i * 1000);
|
||||
}
|
||||
|
||||
content = getJson(apiUrl);
|
||||
content = getJson2(apiUrl);
|
||||
|
||||
fin.write(content.getBytes());
|
||||
|
||||
|
|
Loading…
Reference in New Issue