forked from D-Net/dnet-hadoop
Corrections for correct download of OpenAIRE logs - Date limits
This commit is contained in:
parent
7b7075cfdd
commit
2b330dd84c
|
@ -11,8 +11,10 @@ import java.sql.PreparedStatement;
|
|||
import java.sql.ResultSet;
|
||||
import java.sql.Statement;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Calendar;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
|
||||
import javax.net.ssl.HostnameVerifier;
|
||||
import javax.net.ssl.HttpsURLConnection;
|
||||
|
@ -60,7 +62,7 @@ public class PiwikDownloadLogs {
|
|||
|
||||
private String getJson(String url) throws Exception {
|
||||
try {
|
||||
System.out.println("===> Connecting to: " + url);
|
||||
logger.info("Connecting to download the JSON: " + url);
|
||||
URL website = new URL(url);
|
||||
URLConnection connection = website.openConnection();
|
||||
|
||||
|
@ -87,29 +89,32 @@ public class PiwikDownloadLogs {
|
|||
public void GetOpenAIRELogs(String repoLogsPath, String portalLogPath, String portalMatomoID) throws Exception {
|
||||
|
||||
Statement statement = ConnectDB.getHiveConnection().createStatement();
|
||||
// SimpleDateFormat sdf = new SimpleDateFormat("MM/dd/yyyy");
|
||||
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
|
||||
|
||||
ResultSet rs = statement
|
||||
.executeQuery(
|
||||
"SELECT distinct piwik_id from " + ConnectDB.getStatsDBSchema()
|
||||
+ ".datasource where piwik_id is not null and piwik_id <> 0 order by piwik_id");
|
||||
while (rs.next()) {
|
||||
int siteId = rs.getInt(1);
|
||||
// SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM");
|
||||
|
||||
// Calendar start = Calendar.getInstance();
|
||||
// start.set(Calendar.YEAR, 2016);
|
||||
// start.set(Calendar.MONTH, Calendar.MARCH);
|
||||
// start.setTime(simpleDateFormat.parse("2016-01"));
|
||||
Calendar start = ExecuteWorkflow.startingLogPeriod;
|
||||
logger.info("GetOpenAIRELogs starting period: " + sdf.format(start.getTime()));
|
||||
// Getting all the piwikids in a list for logging reasons
|
||||
List<Integer> piwikIdToVisit = new ArrayList<Integer>();
|
||||
while (rs.next())
|
||||
piwikIdToVisit.add(rs.getInt(1));
|
||||
logger.info("Downloading from repos with the followins piwikIds: " + piwikIdToVisit);
|
||||
|
||||
// Calendar end = Calendar.getInstance();
|
||||
// end.add(Calendar.DAY_OF_MONTH, -1);
|
||||
Calendar end = ExecuteWorkflow.endingLogPeriod;
|
||||
end.add(Calendar.DAY_OF_MONTH, -1);
|
||||
logger.info("GetOpenAIRELogs ending period: " + sdf.format(end.getTime()));
|
||||
// Setting the starting period
|
||||
Calendar start = ExecuteWorkflow.startingLogPeriod;
|
||||
logger.info("Starting period for log download: " + sdf.format(start.getTime()));
|
||||
|
||||
// Setting the ending period (last day of the month)
|
||||
Calendar end = ExecuteWorkflow.endingLogPeriod;
|
||||
end.add(Calendar.MONTH, +1);
|
||||
end.add(Calendar.DAY_OF_MONTH, -1);
|
||||
logger.info("Starting period for log download: " + sdf.format(end.getTime()));
|
||||
|
||||
for (int siteId : piwikIdToVisit) {
|
||||
|
||||
logger.info("Now working on piwikId: " + siteId);
|
||||
|
||||
PreparedStatement st = ConnectDB.DB_HIVE_CONNECTION
|
||||
.prepareStatement(
|
||||
|
@ -126,7 +131,8 @@ public class PiwikDownloadLogs {
|
|||
}
|
||||
rs_date.close();
|
||||
|
||||
for (Date date = start.getTime(); start.before(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
|
||||
for (Calendar currDay = (Calendar) start.clone(); currDay.before(end); currDay.add(Calendar.DATE, 1)) {
|
||||
Date date = currDay.getTime();
|
||||
logger.info("Downloading logs for repoid " + siteId + " and for " + sdf.format(date));
|
||||
|
||||
String period = "&period=day&date=" + sdf.format(date);
|
||||
|
|
Loading…
Reference in New Issue