diff --git a/build.gradle b/build.gradle index c211e31..6f84353 100644 --- a/build.gradle +++ b/build.gradle @@ -42,6 +42,7 @@ dependencies { exclude group: 'ch.qos.logback', module: 'logback-classic' exclude group: 'org.slf4j', module: 'slf4j-api' exclude group: 'io.minio' // This is not used in the Worker, since it's the Controller which uploads the full-texts to S3. It also includes an older "commons-compress" version which causes problems. + exclude group: 'org.apache.commons', module: 'commons-compress' } implementation group: 'com.google.guava', name: 'guava', version: '33.2.0-jre' diff --git a/src/main/java/eu/openaire/urls_worker/util/FilesCompressor.java b/src/main/java/eu/openaire/urls_worker/util/FilesCompressor.java index 95e754d..22dc208 100644 --- a/src/main/java/eu/openaire/urls_worker/util/FilesCompressor.java +++ b/src/main/java/eu/openaire/urls_worker/util/FilesCompressor.java @@ -1,5 +1,6 @@ package eu.openaire.urls_worker.util; +import com.github.luben.zstd.Zstd; import eu.openaire.urls_worker.controllers.FullTextsController; import eu.openaire.urls_worker.models.TarFileResult; import org.apache.commons.compress.archivers.tar.TarArchiveEntry; @@ -24,6 +25,8 @@ public class FilesCompressor { public static final int bufferSize = (5 * 1_048_576); // 5 Mb + public static final int maxCompressionLevel = Zstd.maxCompressionLevel(); + public static File compressMultipleFilesIntoOne(long assignmentsCounter, int tarBatchCounter, List filesToCompress, String baseDirectory) { @@ -51,7 +54,7 @@ public class FilesCompressor { File zStandardFile = new File(zStandardFileFullPath); try ( BufferedInputStream in = new BufferedInputStream(Files.newInputStream(Paths.get(tarFilePath)), bufferSize); - ZstdCompressorOutputStream zOut = new ZstdCompressorOutputStream(new BufferedOutputStream(Files.newOutputStream(zStandardFile.toPath())), bufferSize) ) + ZstdCompressorOutputStream zOut = new ZstdCompressorOutputStream(new BufferedOutputStream(Files.newOutputStream(zStandardFile.toPath()), bufferSize), maxCompressionLevel) ) { int readByte; while ( (readByte = in.read()) != -1 )