- Reduce the amount of "requiredFreeSpace" needed to be available in order to accept new assignments.

- Increase the time to wait before rechecking the available free space, in order to get new assignments, to 30 minutes.
- Update dependencies.
- Code polishing.
This commit is contained in:
Lampros Smyrnaios 2024-05-28 23:10:52 +03:00
parent c242f65518
commit 4af74d4581
3 changed files with 11 additions and 13 deletions

View File

@ -6,7 +6,7 @@ plugins {
java {
group = 'eu.openaire.urls_worker'
version = '2.1.9-SNAPSHOT'
version = '2.1.12-SNAPSHOT'
sourceCompatibility = JavaVersion.VERSION_1_8
}
@ -47,9 +47,9 @@ dependencies {
implementation group: 'com.google.guava', name: 'guava', version: '33.2.0-jre'
// https://mvnrepository.com/artifact/com.google.code.gson/gson
implementation 'com.google.code.gson:gson:2.10.1'
implementation 'com.google.code.gson:gson:2.11.0'
implementation("org.apache.commons:commons-compress:1.26.1") {
implementation("org.apache.commons:commons-compress:1.26.2") {
exclude group: 'com.github.luben', module: 'zstd-jni'
}
implementation 'com.github.luben:zstd-jni:1.5.6-3' // Even though this is part of the above dependency, the Apache commons rarely updates it, while the zstd team makes improvements very often.

View File

@ -50,7 +50,7 @@ public class ScheduledTasks {
if ( maxAssignmentsLimitPerBatch <= 1_000 )
requiredFreeSpace = oneAndHalfGB;
else
requiredFreeSpace = oneAndHalfGB * (maxAssignmentsLimitPerBatch / 1_000);
requiredFreeSpace = oneAndHalfGB * (maxAssignmentsLimitPerBatch / 1_500);
logger.info("The \"requiredFreeSpace\" for the app to request new assignments, having \"maxAssignmentsLimitPerBatch\" equal to " + maxAssignmentsLimitPerBatch + " , is: " + (requiredFreeSpace / (1024 * 1024)) + " Mb");
}
@ -69,9 +69,9 @@ public class ScheduledTasks {
// It's not safe to proceed with downloading more files and risk of "noSpaceLeft" error.
// Wait for the Controller to take the full-texts and any remaining files to be deleted, so that more free-space becomes available.
// We need to have some buffer zone for the ".tar" files which will be created from the already downloaded full-texts, when the Controller starts requesting them.
logger.warn("The free space is running out (less than " + (requiredFreeSpace / oneMb) + " Mb). The Worker will avoid getting new assignments for the next 15 minutes.");
logger.warn("The free space is running low (less than " + (requiredFreeSpace / oneMb) + " Mb). The Worker will avoid getting new assignments for the next 30 minutes.");
try {
Thread.sleep(900_000); // Sleep for 15 mins to stall the scheduler from retrying right away, thus giving time to the disk-space to be freed.
Thread.sleep(1_800_000); // Sleep for 30 mins to stall the scheduler from retrying right away, thus giving time to the disk-space to be freed.
} catch (InterruptedException ie) {
logger.warn("Sleeping was interrupted!");
}

View File

@ -34,7 +34,7 @@ public class FilesCompressor {
logger.error("Exception when creating the tar-file for assignments_" + assignmentsCounter, e);
return null;
} finally {
// Delete the files of this failed batch immediately. These files will not be requested again. The urls leading to these file will be reprocessed in the future.
// Delete the files of this batch immediately. These files will not be requested again. The urls leading to these file may be reprocessed in the future, in case the tar did not make it to the Controller..
for ( String fileName : filesToCompress )
FullTextsController.deleteFile(baseDirectory + fileName);
}
@ -50,9 +50,8 @@ public class FilesCompressor {
ZstdCompressorOutputStream zOut = new ZstdCompressorOutputStream(new BufferedOutputStream(Files.newOutputStream(zStandardFile.toPath())), bufferSize) )
{
int readByte;
while ( (readByte = in.read()) != -1 ) {
while ( (readByte = in.read()) != -1 )
zOut.write(readByte);
}
} catch (Exception e) {
logger.error("Exception when compressing the tar-archive: " + tarFilePath, e);
return null;
@ -80,15 +79,14 @@ public class FilesCompressor {
try ( TarArchiveOutputStream taos = new TarArchiveOutputStream(new BufferedOutputStream(Files.newOutputStream(tarFile.toPath()), bufferSize)) )
{
for ( String fileName : filesToTar ) {
for ( String fileName : filesToTar )
if ( addTarEntry(taos, fileName, baseDir) )
numTarredFiles ++;
}
}
if ( numTarredFiles == 0 ) {
if ( numTarredFiles == 0 )
throw new RuntimeException("None of the requested (" + filesToTar.size() + ") could be tarred, for assignments_" + assignmentsCounter + ", batch_" + tarBatchCounter);
} else if ( numTarredFiles != filesToTar.size() )
else if ( numTarredFiles != filesToTar.size() )
logger.warn("The number of \"numTarredFiles\" (" + numTarredFiles + ") is different from the number of files requested to be tarred (" + filesToTar.size() + "), for assignments_" + assignmentsCounter + ", batch_" + tarBatchCounter);
// Still, some files may have been tarred, so we move on. It's up to the Controller, to handle such case.