From 4af74d458126052d805a246352d77198f1673929 Mon Sep 17 00:00:00 2001 From: LSmyrnaios Date: Tue, 28 May 2024 23:10:52 +0300 Subject: [PATCH] - Reduce the amount of "requiredFreeSpace" needed to be available in order to accept new assignments. - Increase the time to wait before rechecking the available free space, in order to get new assignments, to 30 minutes. - Update dependencies. - Code polishing. --- build.gradle | 6 +++--- .../urls_worker/components/ScheduledTasks.java | 6 +++--- .../openaire/urls_worker/util/FilesCompressor.java | 12 +++++------- 3 files changed, 11 insertions(+), 13 deletions(-) diff --git a/build.gradle b/build.gradle index 43131c8..c211e31 100644 --- a/build.gradle +++ b/build.gradle @@ -6,7 +6,7 @@ plugins { java { group = 'eu.openaire.urls_worker' - version = '2.1.9-SNAPSHOT' + version = '2.1.12-SNAPSHOT' sourceCompatibility = JavaVersion.VERSION_1_8 } @@ -47,9 +47,9 @@ dependencies { implementation group: 'com.google.guava', name: 'guava', version: '33.2.0-jre' // https://mvnrepository.com/artifact/com.google.code.gson/gson - implementation 'com.google.code.gson:gson:2.10.1' + implementation 'com.google.code.gson:gson:2.11.0' - implementation("org.apache.commons:commons-compress:1.26.1") { + implementation("org.apache.commons:commons-compress:1.26.2") { exclude group: 'com.github.luben', module: 'zstd-jni' } implementation 'com.github.luben:zstd-jni:1.5.6-3' // Even though this is part of the above dependency, the Apache commons rarely updates it, while the zstd team makes improvements very often. diff --git a/src/main/java/eu/openaire/urls_worker/components/ScheduledTasks.java b/src/main/java/eu/openaire/urls_worker/components/ScheduledTasks.java index b1ef10e..bcf5de5 100644 --- a/src/main/java/eu/openaire/urls_worker/components/ScheduledTasks.java +++ b/src/main/java/eu/openaire/urls_worker/components/ScheduledTasks.java @@ -50,7 +50,7 @@ public class ScheduledTasks { if ( maxAssignmentsLimitPerBatch <= 1_000 ) requiredFreeSpace = oneAndHalfGB; else - requiredFreeSpace = oneAndHalfGB * (maxAssignmentsLimitPerBatch / 1_000); + requiredFreeSpace = oneAndHalfGB * (maxAssignmentsLimitPerBatch / 1_500); logger.info("The \"requiredFreeSpace\" for the app to request new assignments, having \"maxAssignmentsLimitPerBatch\" equal to " + maxAssignmentsLimitPerBatch + " , is: " + (requiredFreeSpace / (1024 * 1024)) + " Mb"); } @@ -69,9 +69,9 @@ public class ScheduledTasks { // It's not safe to proceed with downloading more files and risk of "noSpaceLeft" error. // Wait for the Controller to take the full-texts and any remaining files to be deleted, so that more free-space becomes available. // We need to have some buffer zone for the ".tar" files which will be created from the already downloaded full-texts, when the Controller starts requesting them. - logger.warn("The free space is running out (less than " + (requiredFreeSpace / oneMb) + " Mb). The Worker will avoid getting new assignments for the next 15 minutes."); + logger.warn("The free space is running low (less than " + (requiredFreeSpace / oneMb) + " Mb). The Worker will avoid getting new assignments for the next 30 minutes."); try { - Thread.sleep(900_000); // Sleep for 15 mins to stall the scheduler from retrying right away, thus giving time to the disk-space to be freed. + Thread.sleep(1_800_000); // Sleep for 30 mins to stall the scheduler from retrying right away, thus giving time to the disk-space to be freed. } catch (InterruptedException ie) { logger.warn("Sleeping was interrupted!"); } diff --git a/src/main/java/eu/openaire/urls_worker/util/FilesCompressor.java b/src/main/java/eu/openaire/urls_worker/util/FilesCompressor.java index edd57b1..5eb9971 100644 --- a/src/main/java/eu/openaire/urls_worker/util/FilesCompressor.java +++ b/src/main/java/eu/openaire/urls_worker/util/FilesCompressor.java @@ -34,7 +34,7 @@ public class FilesCompressor { logger.error("Exception when creating the tar-file for assignments_" + assignmentsCounter, e); return null; } finally { - // Delete the files of this failed batch immediately. These files will not be requested again. The urls leading to these file will be reprocessed in the future. + // Delete the files of this batch immediately. These files will not be requested again. The urls leading to these file may be reprocessed in the future, in case the tar did not make it to the Controller.. for ( String fileName : filesToCompress ) FullTextsController.deleteFile(baseDirectory + fileName); } @@ -50,9 +50,8 @@ public class FilesCompressor { ZstdCompressorOutputStream zOut = new ZstdCompressorOutputStream(new BufferedOutputStream(Files.newOutputStream(zStandardFile.toPath())), bufferSize) ) { int readByte; - while ( (readByte = in.read()) != -1 ) { + while ( (readByte = in.read()) != -1 ) zOut.write(readByte); - } } catch (Exception e) { logger.error("Exception when compressing the tar-archive: " + tarFilePath, e); return null; @@ -80,15 +79,14 @@ public class FilesCompressor { try ( TarArchiveOutputStream taos = new TarArchiveOutputStream(new BufferedOutputStream(Files.newOutputStream(tarFile.toPath()), bufferSize)) ) { - for ( String fileName : filesToTar ) { + for ( String fileName : filesToTar ) if ( addTarEntry(taos, fileName, baseDir) ) numTarredFiles ++; - } } - if ( numTarredFiles == 0 ) { + if ( numTarredFiles == 0 ) throw new RuntimeException("None of the requested (" + filesToTar.size() + ") could be tarred, for assignments_" + assignmentsCounter + ", batch_" + tarBatchCounter); - } else if ( numTarredFiles != filesToTar.size() ) + else if ( numTarredFiles != filesToTar.size() ) logger.warn("The number of \"numTarredFiles\" (" + numTarredFiles + ") is different from the number of files requested to be tarred (" + filesToTar.size() + "), for assignments_" + assignmentsCounter + ", batch_" + tarBatchCounter); // Still, some files may have been tarred, so we move on. It's up to the Controller, to handle such case.