diff --git a/src/main/java/eu/openaire/urls_worker/util/FilesCompressor.java b/src/main/java/eu/openaire/urls_worker/util/FilesCompressor.java index 1738903..9ca1130 100644 --- a/src/main/java/eu/openaire/urls_worker/util/FilesCompressor.java +++ b/src/main/java/eu/openaire/urls_worker/util/FilesCompressor.java @@ -6,8 +6,12 @@ import org.apache.commons.compress.compressors.zstandard.ZstdCompressorOutputStr import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.*; +import java.io.BufferedInputStream; +import java.io.BufferedOutputStream; +import java.io.File; +import java.io.IOException; import java.nio.file.Files; +import java.nio.file.Path; import java.nio.file.Paths; import java.util.List; @@ -15,6 +19,9 @@ public class FilesCompressor { private static final Logger logger = LoggerFactory.getLogger(FilesCompressor.class); + static final int tenMb = 10 * 1_048_576; + + public static File compressMultipleFilesIntoOne(long assignmentsCounter, int zipBatchCounter, List filesToCompress, String baseDirectory) { // For example: assignments_2_full-texts_4.zip | where < 4 > is referred to the 4th batch of files requested by the Controller. @@ -30,13 +37,12 @@ public class FilesCompressor { String zStandardFileFullPath = tarFilePath + ".zstd"; File zStandardFile = new File(zStandardFileFullPath); - try ( InputStream in = Files.newInputStream(Paths.get(tarFilePath)); - ZstdCompressorOutputStream zOut = new ZstdCompressorOutputStream(new BufferedOutputStream(Files.newOutputStream(zStandardFile.toPath()))) ) + try ( BufferedInputStream in = new BufferedInputStream(Files.newInputStream(Paths.get(tarFilePath)), tenMb); + ZstdCompressorOutputStream zOut = new ZstdCompressorOutputStream(new BufferedOutputStream(Files.newOutputStream(zStandardFile.toPath())), tenMb) ) { - final byte[] buffer = new byte[1048576]; // 1 Mb - int numBytes = 0; - while ( (numBytes = in.read(buffer)) != -1 ) { - zOut.write(buffer, 0, numBytes); + int readByte; + while ( (readByte = in.read()) != -1 ) { + zOut.write(readByte); } } catch (Exception e) { logger.error("Exception when compressing the tar-archive: " + tarFilePath, e); @@ -60,7 +66,7 @@ public class FilesCompressor { int numTarredFiles = 0; File tarFile = new File(tarFileFullPath); - try ( TarArchiveOutputStream taos = new TarArchiveOutputStream(Files.newOutputStream(tarFile.toPath())) ) + try ( TarArchiveOutputStream taos = new TarArchiveOutputStream(new BufferedOutputStream(Files.newOutputStream(tarFile.toPath()), tenMb)) ) { for ( String fileName : filesToTar ) { if ( addTarEntry(taos, fileName, baseDir) ) @@ -79,11 +85,11 @@ public class FilesCompressor { { boolean shouldCloseEntry = false; // Useful in order to know if we should close the entry (an Exception may appear, and so we should not try to close it). - String fullFileName = baseDir + fileName; - try ( FileInputStream fis = new FileInputStream(fullFileName) ) + Path fullFileNamePath = Paths.get(baseDir + fileName); + try ( BufferedInputStream fis = new BufferedInputStream(Files.newInputStream(fullFileNamePath), tenMb) ) { TarArchiveEntry entry = new TarArchiveEntry(fileName); - entry.setSize(Files.size(Paths.get(fullFileName))); // Yes, tar requires that we set the size beforehand.. + entry.setSize(Files.size(fullFileNamePath)); // Yes, tar requires that we set the size beforehand.. taos.putArchiveEntry(entry); shouldCloseEntry = true;