From 212f8f377d16f8dfe0605126360b567b543b561b Mon Sep 17 00:00:00 2001 From: LSmyrnaios Date: Tue, 30 Nov 2021 06:57:51 +0200 Subject: [PATCH] - Set the "ConnSupportUtils.shouldBlockMost5XXDomains" to "false" and call the "LoaderAndChecker.setCouldRetryRegex()" method. The above, make sure that for HTTP-5XX-errors, only the 511-domains get blocked and only the 511-urls get labeled with "noRetry". - Improve performance and reduce memory consumption, by calling the "ConnSupportUtils.setKnownMimeTypes()" method only once, in the constructor-method. - Code cleanup. --- installAndRun.sh | 2 +- .../plugins/PublicationsRetrieverPlugin.java | 21 ++++++++++--------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/installAndRun.sh b/installAndRun.sh index 34615b0..9369275 100755 --- a/installAndRun.sh +++ b/installAndRun.sh @@ -47,7 +47,7 @@ if [[ justInstall -eq 0 ]]; then # Delete the directory with the source-code. cd ../ && rm -rf PublicationsRetriever - # Clean and (re)build and run the project. + # Clean, (re)build and run the project. cd ../ if [[ ! -d /opt/gradle/gradle-${gradleVersion} ]]; then diff --git a/src/main/java/eu/openaire/urls_worker/plugins/PublicationsRetrieverPlugin.java b/src/main/java/eu/openaire/urls_worker/plugins/PublicationsRetrieverPlugin.java index ae24fcc..d6d716f 100644 --- a/src/main/java/eu/openaire/urls_worker/plugins/PublicationsRetrieverPlugin.java +++ b/src/main/java/eu/openaire/urls_worker/plugins/PublicationsRetrieverPlugin.java @@ -33,28 +33,30 @@ public class PublicationsRetrieverPlugin { public static String assignmentsBasePath; - static { + + public PublicationsRetrieverPlugin() { // Specify some configurations LoaderAndChecker.retrieveDocuments = true; LoaderAndChecker.retrieveDatasets = false; + ConnSupportUtils.setKnownMimeTypes(); FileUtils.shouldDownloadDocFiles = true; FileUtils.docFileNameType = FileUtils.DocFileNameType.idName; PublicationsRetriever.targetUrlType = "docUrl"; FileUtils.jsonBatchSize = WorkerConstants.ASSIGNMENTS_LIMIT; + assignmentsBasePath = FileStorageService.assignmentsLocation.toString(); + if ( !assignmentsBasePath.endsWith(File.separator) ) + assignmentsBasePath += File.separator; + + ConnSupportUtils.shouldBlockMost5XXDomains = false; + LoaderAndChecker.setCouldRetryRegex(); + PublicationsRetriever.threadsMultiplier = 6; int workerThreadsCount = Runtime.getRuntime().availableProcessors() * PublicationsRetriever.threadsMultiplier; logger.info("Use " + workerThreadsCount + " worker-threads."); PublicationsRetriever.executor = Executors.newFixedThreadPool(workerThreadsCount); } - - public PublicationsRetrieverPlugin() { - assignmentsBasePath = FileStorageService.assignmentsLocation.toString(); - if ( !assignmentsBasePath.endsWith(File.separator) ) - assignmentsBasePath += File.separator; - } - private static final List> callableTasks = new ArrayList<>(FileUtils.jsonBatchSize); public static void processAssignments(Long assignmentRequestCounter, Collection assignments) throws RuntimeException, FileNotFoundException @@ -66,11 +68,10 @@ public class PublicationsRetrieverPlugin { if ( !curAssignmentsDirs.mkdirs() ) { // Create the directories. String workingDir = System.getProperty("user.dir") + File.separator; logger.error("Could not create the \"assignments_fullTexts directories\": \"" + FileUtils.storeDocFilesDir + "\". Using the \"workingDir\" instead (" + workingDir + ")."); - FileUtils.storeDocFilesDir = assignmentsBasePath = workingDir; + FileUtils.storeDocFilesDir = assignmentsBasePath = workingDir; } } - ConnSupportUtils.setKnownMimeTypes(); int tasksNumber = assignments.size(); int batchCount = 0; int tasksCount = 0;