forked from lsmyrnaios/UrlsWorker
- Set the "ConnSupportUtils.shouldBlockMost5XXDomains" to "false" and call the "LoaderAndChecker.setCouldRetryRegex()" method. The above, make sure that for HTTP-5XX-errors, only the 511-domains get blocked and only the 511-urls get labeled with "noRetry".
- Improve performance and reduce memory consumption, by calling the "ConnSupportUtils.setKnownMimeTypes()" method only once, in the constructor-method. - Code cleanup.
This commit is contained in:
parent
6355b3e397
commit
212f8f377d
|
@ -47,7 +47,7 @@ if [[ justInstall -eq 0 ]]; then
|
|||
# Delete the directory with the source-code.
|
||||
cd ../ && rm -rf PublicationsRetriever
|
||||
|
||||
# Clean and (re)build and run the project.
|
||||
# Clean, (re)build and run the project.
|
||||
cd ../
|
||||
|
||||
if [[ ! -d /opt/gradle/gradle-${gradleVersion} ]]; then
|
||||
|
|
|
@ -33,28 +33,30 @@ public class PublicationsRetrieverPlugin {
|
|||
|
||||
public static String assignmentsBasePath;
|
||||
|
||||
static {
|
||||
|
||||
public PublicationsRetrieverPlugin() {
|
||||
// Specify some configurations
|
||||
LoaderAndChecker.retrieveDocuments = true;
|
||||
LoaderAndChecker.retrieveDatasets = false;
|
||||
ConnSupportUtils.setKnownMimeTypes();
|
||||
FileUtils.shouldDownloadDocFiles = true;
|
||||
FileUtils.docFileNameType = FileUtils.DocFileNameType.idName;
|
||||
PublicationsRetriever.targetUrlType = "docUrl";
|
||||
FileUtils.jsonBatchSize = WorkerConstants.ASSIGNMENTS_LIMIT;
|
||||
|
||||
assignmentsBasePath = FileStorageService.assignmentsLocation.toString();
|
||||
if ( !assignmentsBasePath.endsWith(File.separator) )
|
||||
assignmentsBasePath += File.separator;
|
||||
|
||||
ConnSupportUtils.shouldBlockMost5XXDomains = false;
|
||||
LoaderAndChecker.setCouldRetryRegex();
|
||||
|
||||
PublicationsRetriever.threadsMultiplier = 6;
|
||||
int workerThreadsCount = Runtime.getRuntime().availableProcessors() * PublicationsRetriever.threadsMultiplier;
|
||||
logger.info("Use " + workerThreadsCount + " worker-threads.");
|
||||
PublicationsRetriever.executor = Executors.newFixedThreadPool(workerThreadsCount);
|
||||
}
|
||||
|
||||
|
||||
public PublicationsRetrieverPlugin() {
|
||||
assignmentsBasePath = FileStorageService.assignmentsLocation.toString();
|
||||
if ( !assignmentsBasePath.endsWith(File.separator) )
|
||||
assignmentsBasePath += File.separator;
|
||||
}
|
||||
|
||||
private static final List<Callable<Boolean>> callableTasks = new ArrayList<>(FileUtils.jsonBatchSize);
|
||||
|
||||
public static void processAssignments(Long assignmentRequestCounter, Collection<Assignment> assignments) throws RuntimeException, FileNotFoundException
|
||||
|
@ -70,7 +72,6 @@ public class PublicationsRetrieverPlugin {
|
|||
}
|
||||
}
|
||||
|
||||
ConnSupportUtils.setKnownMimeTypes();
|
||||
int tasksNumber = assignments.size();
|
||||
int batchCount = 0;
|
||||
int tasksCount = 0;
|
||||
|
|
Loading…
Reference in New Issue