forked from D-Net/dnet-hadoop
Corrections for correct download of OpenAIRE logs - Date limits
This commit is contained in:
parent
7b7075cfdd
commit
2b330dd84c
|
@ -11,8 +11,10 @@ import java.sql.PreparedStatement;
|
||||||
import java.sql.ResultSet;
|
import java.sql.ResultSet;
|
||||||
import java.sql.Statement;
|
import java.sql.Statement;
|
||||||
import java.text.SimpleDateFormat;
|
import java.text.SimpleDateFormat;
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.Calendar;
|
import java.util.Calendar;
|
||||||
import java.util.Date;
|
import java.util.Date;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
import javax.net.ssl.HostnameVerifier;
|
import javax.net.ssl.HostnameVerifier;
|
||||||
import javax.net.ssl.HttpsURLConnection;
|
import javax.net.ssl.HttpsURLConnection;
|
||||||
|
@ -60,7 +62,7 @@ public class PiwikDownloadLogs {
|
||||||
|
|
||||||
private String getJson(String url) throws Exception {
|
private String getJson(String url) throws Exception {
|
||||||
try {
|
try {
|
||||||
System.out.println("===> Connecting to: " + url);
|
logger.info("Connecting to download the JSON: " + url);
|
||||||
URL website = new URL(url);
|
URL website = new URL(url);
|
||||||
URLConnection connection = website.openConnection();
|
URLConnection connection = website.openConnection();
|
||||||
|
|
||||||
|
@ -87,29 +89,32 @@ public class PiwikDownloadLogs {
|
||||||
public void GetOpenAIRELogs(String repoLogsPath, String portalLogPath, String portalMatomoID) throws Exception {
|
public void GetOpenAIRELogs(String repoLogsPath, String portalLogPath, String portalMatomoID) throws Exception {
|
||||||
|
|
||||||
Statement statement = ConnectDB.getHiveConnection().createStatement();
|
Statement statement = ConnectDB.getHiveConnection().createStatement();
|
||||||
// SimpleDateFormat sdf = new SimpleDateFormat("MM/dd/yyyy");
|
|
||||||
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
|
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
|
||||||
|
|
||||||
ResultSet rs = statement
|
ResultSet rs = statement
|
||||||
.executeQuery(
|
.executeQuery(
|
||||||
"SELECT distinct piwik_id from " + ConnectDB.getStatsDBSchema()
|
"SELECT distinct piwik_id from " + ConnectDB.getStatsDBSchema()
|
||||||
+ ".datasource where piwik_id is not null and piwik_id <> 0 order by piwik_id");
|
+ ".datasource where piwik_id is not null and piwik_id <> 0 order by piwik_id");
|
||||||
while (rs.next()) {
|
|
||||||
int siteId = rs.getInt(1);
|
|
||||||
// SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM");
|
|
||||||
|
|
||||||
// Calendar start = Calendar.getInstance();
|
// Getting all the piwikids in a list for logging reasons
|
||||||
// start.set(Calendar.YEAR, 2016);
|
List<Integer> piwikIdToVisit = new ArrayList<Integer>();
|
||||||
// start.set(Calendar.MONTH, Calendar.MARCH);
|
while (rs.next())
|
||||||
// start.setTime(simpleDateFormat.parse("2016-01"));
|
piwikIdToVisit.add(rs.getInt(1));
|
||||||
Calendar start = ExecuteWorkflow.startingLogPeriod;
|
logger.info("Downloading from repos with the followins piwikIds: " + piwikIdToVisit);
|
||||||
logger.info("GetOpenAIRELogs starting period: " + sdf.format(start.getTime()));
|
|
||||||
|
|
||||||
// Calendar end = Calendar.getInstance();
|
// Setting the starting period
|
||||||
// end.add(Calendar.DAY_OF_MONTH, -1);
|
Calendar start = ExecuteWorkflow.startingLogPeriod;
|
||||||
Calendar end = ExecuteWorkflow.endingLogPeriod;
|
logger.info("Starting period for log download: " + sdf.format(start.getTime()));
|
||||||
end.add(Calendar.DAY_OF_MONTH, -1);
|
|
||||||
logger.info("GetOpenAIRELogs ending period: " + sdf.format(end.getTime()));
|
// Setting the ending period (last day of the month)
|
||||||
|
Calendar end = ExecuteWorkflow.endingLogPeriod;
|
||||||
|
end.add(Calendar.MONTH, +1);
|
||||||
|
end.add(Calendar.DAY_OF_MONTH, -1);
|
||||||
|
logger.info("Starting period for log download: " + sdf.format(end.getTime()));
|
||||||
|
|
||||||
|
for (int siteId : piwikIdToVisit) {
|
||||||
|
|
||||||
|
logger.info("Now working on piwikId: " + siteId);
|
||||||
|
|
||||||
PreparedStatement st = ConnectDB.DB_HIVE_CONNECTION
|
PreparedStatement st = ConnectDB.DB_HIVE_CONNECTION
|
||||||
.prepareStatement(
|
.prepareStatement(
|
||||||
|
@ -126,7 +131,8 @@ public class PiwikDownloadLogs {
|
||||||
}
|
}
|
||||||
rs_date.close();
|
rs_date.close();
|
||||||
|
|
||||||
for (Date date = start.getTime(); start.before(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
|
for (Calendar currDay = (Calendar) start.clone(); currDay.before(end); currDay.add(Calendar.DATE, 1)) {
|
||||||
|
Date date = currDay.getTime();
|
||||||
logger.info("Downloading logs for repoid " + siteId + " and for " + sdf.format(date));
|
logger.info("Downloading logs for repoid " + siteId + " and for " + sdf.format(date));
|
||||||
|
|
||||||
String period = "&period=day&date=" + sdf.format(date);
|
String period = "&period=day&date=" + sdf.format(date);
|
||||||
|
|
Loading…
Reference in New Issue