diff --git a/src/main/java/eu/openaire/urls_worker/components/AssignmentsHandler.java b/src/main/java/eu/openaire/urls_worker/components/AssignmentsHandler.java index 4a74f44..7229f12 100644 --- a/src/main/java/eu/openaire/urls_worker/components/AssignmentsHandler.java +++ b/src/main/java/eu/openaire/urls_worker/components/AssignmentsHandler.java @@ -86,9 +86,6 @@ public class AssignmentsHandler { assignmentsForPlugins = HashMultimap.create(expectedDatasourcesPerRequest, expectedAssignmentsPerDatasource); requestUrl = this.controllerBaseUrl + (askForTest ? "test/" : "") + "urls?workerId=" + this.workerId + "&workerAssignmentsLimit=" + this.maxAssignmentsLimitPerBatch; - long durationInHours = (long) Math.ceil((double) this.maxAssignmentsLimitPerBatch / 1000); // For example, for 10_000 assignments we wait at most 10 hours. - logger.debug("Setting the max-connection duration for the \"post-worker-report\" to " + durationInHours + " hours."); - if ( !workerReportsDirPath.endsWith("/") ) workerReportsDirPath += "/"; diff --git a/src/main/java/eu/openaire/urls_worker/components/ScheduledTasks.java b/src/main/java/eu/openaire/urls_worker/components/ScheduledTasks.java index 213e241..598b311 100644 --- a/src/main/java/eu/openaire/urls_worker/components/ScheduledTasks.java +++ b/src/main/java/eu/openaire/urls_worker/components/ScheduledTasks.java @@ -69,7 +69,7 @@ public class ScheduledTasks { AssignmentsHandler.shouldNotRequestMore = true; return; } - logger.warn("The free space is running out (less than " + (requiredFreeSpace / (1024 * 1024)) + " Mb). Will avoid to get new assignments for the next 15 minutes."); + logger.warn("The free space is running out (less than " + (requiredFreeSpace / (1024 * 1024)) + " Mb). The Worker will avoid getting new assignments for the next 15 minutes."); try { Thread.sleep(900_000); // Sleep for 15 mins to stall the scheduler from retrying right away, thus giving time to the disk-space to be freed. } catch (InterruptedException ie) { diff --git a/src/main/java/eu/openaire/urls_worker/components/plugins/PublicationsRetrieverPlugin.java b/src/main/java/eu/openaire/urls_worker/components/plugins/PublicationsRetrieverPlugin.java index 5ca3886..2757b9e 100644 --- a/src/main/java/eu/openaire/urls_worker/components/plugins/PublicationsRetrieverPlugin.java +++ b/src/main/java/eu/openaire/urls_worker/components/plugins/PublicationsRetrieverPlugin.java @@ -1,6 +1,5 @@ package eu.openaire.urls_worker.components.plugins; -import edu.uci.ics.crawler4j.url.URLCanonicalizer; import eu.openaire.publications_retriever.PublicationsRetriever; import eu.openaire.publications_retriever.util.file.FileUtils; import eu.openaire.publications_retriever.util.http.ConnSupportUtils; @@ -118,8 +117,8 @@ public class PublicationsRetrieverPlugin { String urlToCheck = url; String sourceUrl = urlToCheck; // Hold it here for the logging-messages. - if ( !UrlUtils.URL_ACCEPTED_CHARS_TO_AVOID_CANONICALIZATION.matcher(sourceUrl).matches() && ((urlToCheck = URLCanonicalizer.getCanonicalURL(sourceUrl, null, StandardCharsets.UTF_8)) == null) ) { - logger.warn("Could not canonicalize url: " + sourceUrl); + if ( (urlToCheck = LoaderAndChecker.basicURLNormalizer.filter(sourceUrl)) == null ) { + logger.warn("Could not normalize url: " + sourceUrl); UrlUtils.logOutputData(id, sourceUrl, null, UrlUtils.unreachableDocOrDatasetUrlIndicator, "Discarded at loading time, due to canonicalization's problems.", null, true, "true", "false", "false", "false", "false", null, null); LoaderAndChecker.connProblematicUrls.incrementAndGet(); return false; diff --git a/src/main/java/eu/openaire/urls_worker/controllers/GeneralController.java b/src/main/java/eu/openaire/urls_worker/controllers/GeneralController.java index 708f978..fc7cc7c 100644 --- a/src/main/java/eu/openaire/urls_worker/controllers/GeneralController.java +++ b/src/main/java/eu/openaire/urls_worker/controllers/GeneralController.java @@ -125,8 +125,8 @@ public class GeneralController { logger.error(initMsg + "The \"HttpServletRequest\" is null!"); return ResponseEntity.internalServerError().build(); } - String remoteAddr = request.getHeader("X-FORWARDED-FOR"); - if ( remoteAddr == null || remoteAddr.isEmpty() ) + String remoteAddr = request.getHeader("X-FORWARDED-FOR"); // This retrieves the original IP address, if the request passes through a proxy server. + if ( remoteAddr == null ) remoteAddr = request.getRemoteAddr(); if ( ! (remoteAddr.equals("127.0.0.1") || remoteAddr.equals(UriBuilder.ip) || remoteAddr.equals(controllerIp)) ) {