diff --git a/src/main/java/eu/openaire/urls_worker/controllers/GeneralController.java b/src/main/java/eu/openaire/urls_worker/controllers/GeneralController.java index 04a6bcc..4be5f27 100644 --- a/src/main/java/eu/openaire/urls_worker/controllers/GeneralController.java +++ b/src/main/java/eu/openaire/urls_worker/controllers/GeneralController.java @@ -43,7 +43,7 @@ public class GeneralController { logger.info(initMsg + " The worker will shutdown, after finishing current work."); return ResponseEntity.ok().build(); } else { - String errorMsg = initMsg + " But, it has an invalid \"shutdownCode\": " + shutdownCode; + String errorMsg = initMsg + " But, it contains an invalid \"shutdownCode\": " + shutdownCode; logger.error(errorMsg); return ResponseEntity.status(HttpStatus.FORBIDDEN).body(errorMsg); } @@ -59,7 +59,7 @@ public class GeneralController { logger.info(initMsg + " Any previous \"shutdownWorker\"-request is canceled. The \"maxAssignmentsBatchesToHandleBeforeShutdown\" will still be honored (if it's set)."); return ResponseEntity.ok().build(); } else { - String errorMsg = initMsg + " But, it has an invalid \"cancelCode\": " + cancelCode; + String errorMsg = initMsg + " But, it contains an invalid \"cancelCode\": " + cancelCode; logger.error(errorMsg); return ResponseEntity.status(HttpStatus.FORBIDDEN).body(errorMsg); } diff --git a/src/main/java/eu/openaire/urls_worker/plugins/PublicationsRetrieverPlugin.java b/src/main/java/eu/openaire/urls_worker/plugins/PublicationsRetrieverPlugin.java index 1e53d0a..ef236c2 100644 --- a/src/main/java/eu/openaire/urls_worker/plugins/PublicationsRetrieverPlugin.java +++ b/src/main/java/eu/openaire/urls_worker/plugins/PublicationsRetrieverPlugin.java @@ -19,6 +19,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.File; +import java.net.CookieStore; import java.nio.charset.StandardCharsets; import java.sql.Timestamp; import java.util.ArrayList; @@ -35,6 +36,8 @@ public class PublicationsRetrieverPlugin { public static String assignmentsBasePath; + private static CookieStore cookieStore = null; + public PublicationsRetrieverPlugin() { // Specify some configurations @@ -53,6 +56,8 @@ public class PublicationsRetrieverPlugin { ConnSupportUtils.shouldBlockMost5XXDomains = false; LoaderAndChecker.setCouldRetryRegex(); + cookieStore = HttpConnUtils.cookieManager.getCookieStore(); + int availableProcessors = Runtime.getRuntime().availableProcessors(); if ( availableProcessors <= 4 ) PublicationsRetriever.threadsMultiplier = 10; @@ -153,6 +158,12 @@ public class PublicationsRetrieverPlugin { UrlUtils.docOrDatasetUrlsWithIDs.clear(); // This HashTable is useful only for a single assignments-batch. // In the next batch, the previously stored files might have been already uploaded by the Controller and deleted by the worker. Also, they will be stored in a different directory anyway. + + ConnSupportUtils.domainsWithConnectionData.clear(); // This data is not useful for the next batch, since plenty of time will have passed before needing to check the "lastConnectedTime" for each domain, in order to apply the "politenessDelay". + + logger.debug("The number of cookies is: " + cookieStore.getCookies().size()); + boolean cookiesDeleted = cookieStore.removeAll(); + logger.debug(cookiesDeleted ? "The cookies where removed!" : "No cookies where removed!"); } diff --git a/src/main/java/eu/openaire/urls_worker/util/AssignmentsHandler.java b/src/main/java/eu/openaire/urls_worker/util/AssignmentsHandler.java index 05cbc29..37c69cc 100644 --- a/src/main/java/eu/openaire/urls_worker/util/AssignmentsHandler.java +++ b/src/main/java/eu/openaire/urls_worker/util/AssignmentsHandler.java @@ -2,7 +2,6 @@ package eu.openaire.urls_worker.util; import com.google.common.collect.HashMultimap; import com.google.common.collect.Multimap; -import eu.openaire.publications_retriever.util.http.HttpConnUtils; import eu.openaire.publications_retriever.util.url.GenericUtils; import eu.openaire.publications_retriever.util.url.UrlUtils; import eu.openaire.urls_worker.UrlsWorkerApplication; @@ -20,7 +19,6 @@ import org.springframework.http.ResponseEntity; import org.springframework.web.client.RestClientException; import org.springframework.web.client.RestTemplate; -import java.net.CookieStore; import java.time.Duration; import java.util.ArrayList; import java.util.List; @@ -46,19 +44,16 @@ public class AssignmentsHandler { public static long numHandledAssignmentsBatches = 0; // No need to be synchronized. - public static final long idUrlsToHandleBeforeClearingDomainAndPathTrackingData = 10_000_000; - public static final long idUrlsToHandleBeforeClearingDuplicateUrlsData = 1_000_000; + public static final long idUrlsToHandleBeforeClearingDomainAndPathTrackingData = 300_000; + public static final long idUrlsToHandleBeforeClearingDuplicateUrlsData = 200_000; - private static CookieStore cookieStore = null; - public AssignmentsHandler() { urlReports = new ArrayList<>(UrlsWorkerApplication.maxAssignmentsLimitPerBatch); int expectedAssignmentsPerDatasource = (UrlsWorkerApplication.maxAssignmentsLimitPerBatch / expectedDatasourcesPerRequest); assignmentsForPlugins = HashMultimap.create(expectedDatasourcesPerRequest, expectedAssignmentsPerDatasource); requestUrl = UrlsWorkerApplication.controllerBaseUrl + (askForTest ? "test/" : "") + "urls?workerId=" + UrlsWorkerApplication.workerId + "&workerAssignmentsLimit=" + UrlsWorkerApplication.maxAssignmentsLimitPerBatch; - cookieStore = HttpConnUtils.cookieManager.getCookieStore(); } @@ -185,9 +180,6 @@ public class AssignmentsHandler { } finally { urlReports.clear(); // Reset, without de-allocating. assignmentsForPlugins.clear(); - logger.debug("The number of cookies is: " + cookieStore.getCookies().size()); - boolean cookiesDeleted = cookieStore.removeAll(); - logger.debug(cookiesDeleted ? "The cookies where removed!" : "No cookies where removed!"); } }