diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/PiwikDownloadLogs.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/PiwikDownloadLogs.java index 7e94ea3e3..84d7f8c39 100644 --- a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/PiwikDownloadLogs.java +++ b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/PiwikDownloadLogs.java @@ -11,8 +11,10 @@ import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.Statement; import java.text.SimpleDateFormat; +import java.util.ArrayList; import java.util.Calendar; import java.util.Date; +import java.util.List; import javax.net.ssl.HostnameVerifier; import javax.net.ssl.HttpsURLConnection; @@ -60,7 +62,7 @@ public class PiwikDownloadLogs { private String getJson(String url) throws Exception { try { - System.out.println("===> Connecting to: " + url); + logger.info("Connecting to download the JSON: " + url); URL website = new URL(url); URLConnection connection = website.openConnection(); @@ -87,29 +89,32 @@ public class PiwikDownloadLogs { public void GetOpenAIRELogs(String repoLogsPath, String portalLogPath, String portalMatomoID) throws Exception { Statement statement = ConnectDB.getHiveConnection().createStatement(); -// SimpleDateFormat sdf = new SimpleDateFormat("MM/dd/yyyy"); SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd"); ResultSet rs = statement .executeQuery( "SELECT distinct piwik_id from " + ConnectDB.getStatsDBSchema() + ".datasource where piwik_id is not null and piwik_id <> 0 order by piwik_id"); - while (rs.next()) { - int siteId = rs.getInt(1); -// SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM"); -// Calendar start = Calendar.getInstance(); -// start.set(Calendar.YEAR, 2016); -// start.set(Calendar.MONTH, Calendar.MARCH); - // start.setTime(simpleDateFormat.parse("2016-01")); - Calendar start = ExecuteWorkflow.startingLogPeriod; - logger.info("GetOpenAIRELogs starting period: " + sdf.format(start.getTime())); + // Getting all the piwikids in a list for logging reasons + List piwikIdToVisit = new ArrayList(); + while (rs.next()) + piwikIdToVisit.add(rs.getInt(1)); + logger.info("Downloading from repos with the followins piwikIds: " + piwikIdToVisit); -// Calendar end = Calendar.getInstance(); -// end.add(Calendar.DAY_OF_MONTH, -1); - Calendar end = ExecuteWorkflow.endingLogPeriod; - end.add(Calendar.DAY_OF_MONTH, -1); - logger.info("GetOpenAIRELogs ending period: " + sdf.format(end.getTime())); + // Setting the starting period + Calendar start = ExecuteWorkflow.startingLogPeriod; + logger.info("Starting period for log download: " + sdf.format(start.getTime())); + + // Setting the ending period (last day of the month) + Calendar end = ExecuteWorkflow.endingLogPeriod; + end.add(Calendar.MONTH, +1); + end.add(Calendar.DAY_OF_MONTH, -1); + logger.info("Starting period for log download: " + sdf.format(end.getTime())); + + for (int siteId : piwikIdToVisit) { + + logger.info("Now working on piwikId: " + siteId); PreparedStatement st = ConnectDB.DB_HIVE_CONNECTION .prepareStatement( @@ -126,7 +131,8 @@ public class PiwikDownloadLogs { } rs_date.close(); - for (Date date = start.getTime(); start.before(end); start.add(Calendar.DATE, 1), date = start.getTime()) { + for (Calendar currDay = (Calendar) start.clone(); currDay.before(end); currDay.add(Calendar.DATE, 1)) { + Date date = currDay.getTime(); logger.info("Downloading logs for repoid " + siteId + " and for " + sdf.format(date)); String period = "&period=day&date=" + sdf.format(date);