diff --git a/installAndRun.sh b/installAndRun.sh index 34615b0..9369275 100755 --- a/installAndRun.sh +++ b/installAndRun.sh @@ -47,7 +47,7 @@ if [[ justInstall -eq 0 ]]; then # Delete the directory with the source-code. cd ../ && rm -rf PublicationsRetriever - # Clean and (re)build and run the project. + # Clean, (re)build and run the project. cd ../ if [[ ! -d /opt/gradle/gradle-${gradleVersion} ]]; then diff --git a/src/main/java/eu/openaire/urls_worker/plugins/PublicationsRetrieverPlugin.java b/src/main/java/eu/openaire/urls_worker/plugins/PublicationsRetrieverPlugin.java index ae24fcc..d6d716f 100644 --- a/src/main/java/eu/openaire/urls_worker/plugins/PublicationsRetrieverPlugin.java +++ b/src/main/java/eu/openaire/urls_worker/plugins/PublicationsRetrieverPlugin.java @@ -33,28 +33,30 @@ public class PublicationsRetrieverPlugin { public static String assignmentsBasePath; - static { + + public PublicationsRetrieverPlugin() { // Specify some configurations LoaderAndChecker.retrieveDocuments = true; LoaderAndChecker.retrieveDatasets = false; + ConnSupportUtils.setKnownMimeTypes(); FileUtils.shouldDownloadDocFiles = true; FileUtils.docFileNameType = FileUtils.DocFileNameType.idName; PublicationsRetriever.targetUrlType = "docUrl"; FileUtils.jsonBatchSize = WorkerConstants.ASSIGNMENTS_LIMIT; + assignmentsBasePath = FileStorageService.assignmentsLocation.toString(); + if ( !assignmentsBasePath.endsWith(File.separator) ) + assignmentsBasePath += File.separator; + + ConnSupportUtils.shouldBlockMost5XXDomains = false; + LoaderAndChecker.setCouldRetryRegex(); + PublicationsRetriever.threadsMultiplier = 6; int workerThreadsCount = Runtime.getRuntime().availableProcessors() * PublicationsRetriever.threadsMultiplier; logger.info("Use " + workerThreadsCount + " worker-threads."); PublicationsRetriever.executor = Executors.newFixedThreadPool(workerThreadsCount); } - - public PublicationsRetrieverPlugin() { - assignmentsBasePath = FileStorageService.assignmentsLocation.toString(); - if ( !assignmentsBasePath.endsWith(File.separator) ) - assignmentsBasePath += File.separator; - } - private static final List> callableTasks = new ArrayList<>(FileUtils.jsonBatchSize); public static void processAssignments(Long assignmentRequestCounter, Collection assignments) throws RuntimeException, FileNotFoundException @@ -66,11 +68,10 @@ public class PublicationsRetrieverPlugin { if ( !curAssignmentsDirs.mkdirs() ) { // Create the directories. String workingDir = System.getProperty("user.dir") + File.separator; logger.error("Could not create the \"assignments_fullTexts directories\": \"" + FileUtils.storeDocFilesDir + "\". Using the \"workingDir\" instead (" + workingDir + ")."); - FileUtils.storeDocFilesDir = assignmentsBasePath = workingDir; + FileUtils.storeDocFilesDir = assignmentsBasePath = workingDir; } } - ConnSupportUtils.setKnownMimeTypes(); int tasksNumber = assignments.size(); int batchCount = 0; int tasksCount = 0;