- Lower the thresholds for how often to clear the data-structures.
- Clear the "ConnSupportUtils.domainsWithConnectionData" data-structure, after each batch. - Move the code for handling the "CookieStore" inside the "PublicationsRetrieverPlugin", as it is more related to that.
This commit is contained in:
parent
5035094e44
commit
25070d7aba
|
@ -43,7 +43,7 @@ public class GeneralController {
|
||||||
logger.info(initMsg + " The worker will shutdown, after finishing current work.");
|
logger.info(initMsg + " The worker will shutdown, after finishing current work.");
|
||||||
return ResponseEntity.ok().build();
|
return ResponseEntity.ok().build();
|
||||||
} else {
|
} else {
|
||||||
String errorMsg = initMsg + " But, it has an invalid \"shutdownCode\": " + shutdownCode;
|
String errorMsg = initMsg + " But, it contains an invalid \"shutdownCode\": " + shutdownCode;
|
||||||
logger.error(errorMsg);
|
logger.error(errorMsg);
|
||||||
return ResponseEntity.status(HttpStatus.FORBIDDEN).body(errorMsg);
|
return ResponseEntity.status(HttpStatus.FORBIDDEN).body(errorMsg);
|
||||||
}
|
}
|
||||||
|
@ -59,7 +59,7 @@ public class GeneralController {
|
||||||
logger.info(initMsg + " Any previous \"shutdownWorker\"-request is canceled. The \"maxAssignmentsBatchesToHandleBeforeShutdown\" will still be honored (if it's set).");
|
logger.info(initMsg + " Any previous \"shutdownWorker\"-request is canceled. The \"maxAssignmentsBatchesToHandleBeforeShutdown\" will still be honored (if it's set).");
|
||||||
return ResponseEntity.ok().build();
|
return ResponseEntity.ok().build();
|
||||||
} else {
|
} else {
|
||||||
String errorMsg = initMsg + " But, it has an invalid \"cancelCode\": " + cancelCode;
|
String errorMsg = initMsg + " But, it contains an invalid \"cancelCode\": " + cancelCode;
|
||||||
logger.error(errorMsg);
|
logger.error(errorMsg);
|
||||||
return ResponseEntity.status(HttpStatus.FORBIDDEN).body(errorMsg);
|
return ResponseEntity.status(HttpStatus.FORBIDDEN).body(errorMsg);
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,6 +19,7 @@ import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
import java.net.CookieStore;
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.sql.Timestamp;
|
import java.sql.Timestamp;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
@ -35,6 +36,8 @@ public class PublicationsRetrieverPlugin {
|
||||||
|
|
||||||
public static String assignmentsBasePath;
|
public static String assignmentsBasePath;
|
||||||
|
|
||||||
|
private static CookieStore cookieStore = null;
|
||||||
|
|
||||||
|
|
||||||
public PublicationsRetrieverPlugin() {
|
public PublicationsRetrieverPlugin() {
|
||||||
// Specify some configurations
|
// Specify some configurations
|
||||||
|
@ -53,6 +56,8 @@ public class PublicationsRetrieverPlugin {
|
||||||
ConnSupportUtils.shouldBlockMost5XXDomains = false;
|
ConnSupportUtils.shouldBlockMost5XXDomains = false;
|
||||||
LoaderAndChecker.setCouldRetryRegex();
|
LoaderAndChecker.setCouldRetryRegex();
|
||||||
|
|
||||||
|
cookieStore = HttpConnUtils.cookieManager.getCookieStore();
|
||||||
|
|
||||||
int availableProcessors = Runtime.getRuntime().availableProcessors();
|
int availableProcessors = Runtime.getRuntime().availableProcessors();
|
||||||
if ( availableProcessors <= 4 )
|
if ( availableProcessors <= 4 )
|
||||||
PublicationsRetriever.threadsMultiplier = 10;
|
PublicationsRetriever.threadsMultiplier = 10;
|
||||||
|
@ -153,6 +158,12 @@ public class PublicationsRetrieverPlugin {
|
||||||
|
|
||||||
UrlUtils.docOrDatasetUrlsWithIDs.clear(); // This HashTable is useful only for a single assignments-batch.
|
UrlUtils.docOrDatasetUrlsWithIDs.clear(); // This HashTable is useful only for a single assignments-batch.
|
||||||
// In the next batch, the previously stored files might have been already uploaded by the Controller and deleted by the worker. Also, they will be stored in a different directory anyway.
|
// In the next batch, the previously stored files might have been already uploaded by the Controller and deleted by the worker. Also, they will be stored in a different directory anyway.
|
||||||
|
|
||||||
|
ConnSupportUtils.domainsWithConnectionData.clear(); // This data is not useful for the next batch, since plenty of time will have passed before needing to check the "lastConnectedTime" for each domain, in order to apply the "politenessDelay".
|
||||||
|
|
||||||
|
logger.debug("The number of cookies is: " + cookieStore.getCookies().size());
|
||||||
|
boolean cookiesDeleted = cookieStore.removeAll();
|
||||||
|
logger.debug(cookiesDeleted ? "The cookies where removed!" : "No cookies where removed!");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -2,7 +2,6 @@ package eu.openaire.urls_worker.util;
|
||||||
|
|
||||||
import com.google.common.collect.HashMultimap;
|
import com.google.common.collect.HashMultimap;
|
||||||
import com.google.common.collect.Multimap;
|
import com.google.common.collect.Multimap;
|
||||||
import eu.openaire.publications_retriever.util.http.HttpConnUtils;
|
|
||||||
import eu.openaire.publications_retriever.util.url.GenericUtils;
|
import eu.openaire.publications_retriever.util.url.GenericUtils;
|
||||||
import eu.openaire.publications_retriever.util.url.UrlUtils;
|
import eu.openaire.publications_retriever.util.url.UrlUtils;
|
||||||
import eu.openaire.urls_worker.UrlsWorkerApplication;
|
import eu.openaire.urls_worker.UrlsWorkerApplication;
|
||||||
|
@ -20,7 +19,6 @@ import org.springframework.http.ResponseEntity;
|
||||||
import org.springframework.web.client.RestClientException;
|
import org.springframework.web.client.RestClientException;
|
||||||
import org.springframework.web.client.RestTemplate;
|
import org.springframework.web.client.RestTemplate;
|
||||||
|
|
||||||
import java.net.CookieStore;
|
|
||||||
import java.time.Duration;
|
import java.time.Duration;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
@ -46,19 +44,16 @@ public class AssignmentsHandler {
|
||||||
|
|
||||||
public static long numHandledAssignmentsBatches = 0; // No need to be synchronized.
|
public static long numHandledAssignmentsBatches = 0; // No need to be synchronized.
|
||||||
|
|
||||||
public static final long idUrlsToHandleBeforeClearingDomainAndPathTrackingData = 10_000_000;
|
public static final long idUrlsToHandleBeforeClearingDomainAndPathTrackingData = 300_000;
|
||||||
public static final long idUrlsToHandleBeforeClearingDuplicateUrlsData = 1_000_000;
|
public static final long idUrlsToHandleBeforeClearingDuplicateUrlsData = 200_000;
|
||||||
|
|
||||||
|
|
||||||
private static CookieStore cookieStore = null;
|
|
||||||
|
|
||||||
public AssignmentsHandler()
|
public AssignmentsHandler()
|
||||||
{
|
{
|
||||||
urlReports = new ArrayList<>(UrlsWorkerApplication.maxAssignmentsLimitPerBatch);
|
urlReports = new ArrayList<>(UrlsWorkerApplication.maxAssignmentsLimitPerBatch);
|
||||||
int expectedAssignmentsPerDatasource = (UrlsWorkerApplication.maxAssignmentsLimitPerBatch / expectedDatasourcesPerRequest);
|
int expectedAssignmentsPerDatasource = (UrlsWorkerApplication.maxAssignmentsLimitPerBatch / expectedDatasourcesPerRequest);
|
||||||
assignmentsForPlugins = HashMultimap.create(expectedDatasourcesPerRequest, expectedAssignmentsPerDatasource);
|
assignmentsForPlugins = HashMultimap.create(expectedDatasourcesPerRequest, expectedAssignmentsPerDatasource);
|
||||||
requestUrl = UrlsWorkerApplication.controllerBaseUrl + (askForTest ? "test/" : "") + "urls?workerId=" + UrlsWorkerApplication.workerId + "&workerAssignmentsLimit=" + UrlsWorkerApplication.maxAssignmentsLimitPerBatch;
|
requestUrl = UrlsWorkerApplication.controllerBaseUrl + (askForTest ? "test/" : "") + "urls?workerId=" + UrlsWorkerApplication.workerId + "&workerAssignmentsLimit=" + UrlsWorkerApplication.maxAssignmentsLimitPerBatch;
|
||||||
cookieStore = HttpConnUtils.cookieManager.getCookieStore();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -185,9 +180,6 @@ public class AssignmentsHandler {
|
||||||
} finally {
|
} finally {
|
||||||
urlReports.clear(); // Reset, without de-allocating.
|
urlReports.clear(); // Reset, without de-allocating.
|
||||||
assignmentsForPlugins.clear();
|
assignmentsForPlugins.clear();
|
||||||
logger.debug("The number of cookies is: " + cookieStore.getCookies().size());
|
|
||||||
boolean cookiesDeleted = cookieStore.removeAll();
|
|
||||||
logger.debug(cookiesDeleted ? "The cookies where removed!" : "No cookies where removed!");
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue