- Lower the thresholds for how often to clear the data-structures.

- Clear the "ConnSupportUtils.domainsWithConnectionData" data-structure, after each batch.
- Move the code for handling the "CookieStore" inside the "PublicationsRetrieverPlugin", as it is more related to that.
This commit is contained in:
Lampros Smyrnaios 2022-07-04 18:42:05 +03:00
parent 5035094e44
commit 25070d7aba
3 changed files with 15 additions and 12 deletions

View File

@ -43,7 +43,7 @@ public class GeneralController {
logger.info(initMsg + " The worker will shutdown, after finishing current work.");
return ResponseEntity.ok().build();
} else {
String errorMsg = initMsg + " But, it has an invalid \"shutdownCode\": " + shutdownCode;
String errorMsg = initMsg + " But, it contains an invalid \"shutdownCode\": " + shutdownCode;
logger.error(errorMsg);
return ResponseEntity.status(HttpStatus.FORBIDDEN).body(errorMsg);
}
@ -59,7 +59,7 @@ public class GeneralController {
logger.info(initMsg + " Any previous \"shutdownWorker\"-request is canceled. The \"maxAssignmentsBatchesToHandleBeforeShutdown\" will still be honored (if it's set).");
return ResponseEntity.ok().build();
} else {
String errorMsg = initMsg + " But, it has an invalid \"cancelCode\": " + cancelCode;
String errorMsg = initMsg + " But, it contains an invalid \"cancelCode\": " + cancelCode;
logger.error(errorMsg);
return ResponseEntity.status(HttpStatus.FORBIDDEN).body(errorMsg);
}

View File

@ -19,6 +19,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.net.CookieStore;
import java.nio.charset.StandardCharsets;
import java.sql.Timestamp;
import java.util.ArrayList;
@ -35,6 +36,8 @@ public class PublicationsRetrieverPlugin {
public static String assignmentsBasePath;
private static CookieStore cookieStore = null;
public PublicationsRetrieverPlugin() {
// Specify some configurations
@ -53,6 +56,8 @@ public class PublicationsRetrieverPlugin {
ConnSupportUtils.shouldBlockMost5XXDomains = false;
LoaderAndChecker.setCouldRetryRegex();
cookieStore = HttpConnUtils.cookieManager.getCookieStore();
int availableProcessors = Runtime.getRuntime().availableProcessors();
if ( availableProcessors <= 4 )
PublicationsRetriever.threadsMultiplier = 10;
@ -153,6 +158,12 @@ public class PublicationsRetrieverPlugin {
UrlUtils.docOrDatasetUrlsWithIDs.clear(); // This HashTable is useful only for a single assignments-batch.
// In the next batch, the previously stored files might have been already uploaded by the Controller and deleted by the worker. Also, they will be stored in a different directory anyway.
ConnSupportUtils.domainsWithConnectionData.clear(); // This data is not useful for the next batch, since plenty of time will have passed before needing to check the "lastConnectedTime" for each domain, in order to apply the "politenessDelay".
logger.debug("The number of cookies is: " + cookieStore.getCookies().size());
boolean cookiesDeleted = cookieStore.removeAll();
logger.debug(cookiesDeleted ? "The cookies where removed!" : "No cookies where removed!");
}

View File

@ -2,7 +2,6 @@ package eu.openaire.urls_worker.util;
import com.google.common.collect.HashMultimap;
import com.google.common.collect.Multimap;
import eu.openaire.publications_retriever.util.http.HttpConnUtils;
import eu.openaire.publications_retriever.util.url.GenericUtils;
import eu.openaire.publications_retriever.util.url.UrlUtils;
import eu.openaire.urls_worker.UrlsWorkerApplication;
@ -20,7 +19,6 @@ import org.springframework.http.ResponseEntity;
import org.springframework.web.client.RestClientException;
import org.springframework.web.client.RestTemplate;
import java.net.CookieStore;
import java.time.Duration;
import java.util.ArrayList;
import java.util.List;
@ -46,19 +44,16 @@ public class AssignmentsHandler {
public static long numHandledAssignmentsBatches = 0; // No need to be synchronized.
public static final long idUrlsToHandleBeforeClearingDomainAndPathTrackingData = 10_000_000;
public static final long idUrlsToHandleBeforeClearingDuplicateUrlsData = 1_000_000;
public static final long idUrlsToHandleBeforeClearingDomainAndPathTrackingData = 300_000;
public static final long idUrlsToHandleBeforeClearingDuplicateUrlsData = 200_000;
private static CookieStore cookieStore = null;
public AssignmentsHandler()
{
urlReports = new ArrayList<>(UrlsWorkerApplication.maxAssignmentsLimitPerBatch);
int expectedAssignmentsPerDatasource = (UrlsWorkerApplication.maxAssignmentsLimitPerBatch / expectedDatasourcesPerRequest);
assignmentsForPlugins = HashMultimap.create(expectedDatasourcesPerRequest, expectedAssignmentsPerDatasource);
requestUrl = UrlsWorkerApplication.controllerBaseUrl + (askForTest ? "test/" : "") + "urls?workerId=" + UrlsWorkerApplication.workerId + "&workerAssignmentsLimit=" + UrlsWorkerApplication.maxAssignmentsLimitPerBatch;
cookieStore = HttpConnUtils.cookieManager.getCookieStore();
}
@ -185,9 +180,6 @@ public class AssignmentsHandler {
} finally {
urlReports.clear(); // Reset, without de-allocating.
assignmentsForPlugins.clear();
logger.debug("The number of cookies is: " + cookieStore.getCookies().size());
boolean cookiesDeleted = cookieStore.removeAll();
logger.debug(cookiesDeleted ? "The cookies where removed!" : "No cookies where removed!");
}
}