Improve performance when archiving and compressing the full-texts.

This commit is contained in:
Lampros Smyrnaios 2023-03-02 17:47:58 +02:00
parent ff4fd3d289
commit ba989484e4
1 changed files with 17 additions and 11 deletions

View File

@ -6,8 +6,12 @@ import org.apache.commons.compress.compressors.zstandard.ZstdCompressorOutputStr
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.*;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.List;
@ -15,6 +19,9 @@ public class FilesCompressor {
private static final Logger logger = LoggerFactory.getLogger(FilesCompressor.class);
static final int tenMb = 10 * 1_048_576;
public static File compressMultipleFilesIntoOne(long assignmentsCounter, int zipBatchCounter, List<String> filesToCompress, String baseDirectory)
{
// For example: assignments_2_full-texts_4.zip | where < 4 > is referred to the 4th batch of files requested by the Controller.
@ -30,13 +37,12 @@ public class FilesCompressor {
String zStandardFileFullPath = tarFilePath + ".zstd";
File zStandardFile = new File(zStandardFileFullPath);
try ( InputStream in = Files.newInputStream(Paths.get(tarFilePath));
ZstdCompressorOutputStream zOut = new ZstdCompressorOutputStream(new BufferedOutputStream(Files.newOutputStream(zStandardFile.toPath()))) )
try ( BufferedInputStream in = new BufferedInputStream(Files.newInputStream(Paths.get(tarFilePath)), tenMb);
ZstdCompressorOutputStream zOut = new ZstdCompressorOutputStream(new BufferedOutputStream(Files.newOutputStream(zStandardFile.toPath())), tenMb) )
{
final byte[] buffer = new byte[1048576]; // 1 Mb
int numBytes = 0;
while ( (numBytes = in.read(buffer)) != -1 ) {
zOut.write(buffer, 0, numBytes);
int readByte;
while ( (readByte = in.read()) != -1 ) {
zOut.write(readByte);
}
} catch (Exception e) {
logger.error("Exception when compressing the tar-archive: " + tarFilePath, e);
@ -60,7 +66,7 @@ public class FilesCompressor {
int numTarredFiles = 0;
File tarFile = new File(tarFileFullPath);
try ( TarArchiveOutputStream taos = new TarArchiveOutputStream(Files.newOutputStream(tarFile.toPath())) )
try ( TarArchiveOutputStream taos = new TarArchiveOutputStream(new BufferedOutputStream(Files.newOutputStream(tarFile.toPath()), tenMb)) )
{
for ( String fileName : filesToTar ) {
if ( addTarEntry(taos, fileName, baseDir) )
@ -79,11 +85,11 @@ public class FilesCompressor {
{
boolean shouldCloseEntry = false; // Useful in order to know if we should close the entry (an Exception may appear, and so we should not try to close it).
String fullFileName = baseDir + fileName;
try ( FileInputStream fis = new FileInputStream(fullFileName) )
Path fullFileNamePath = Paths.get(baseDir + fileName);
try ( BufferedInputStream fis = new BufferedInputStream(Files.newInputStream(fullFileNamePath), tenMb) )
{
TarArchiveEntry entry = new TarArchiveEntry(fileName);
entry.setSize(Files.size(Paths.get(fullFileName))); // Yes, tar requires that we set the size beforehand..
entry.setSize(Files.size(fullFileNamePath)); // Yes, tar requires that we set the size beforehand..
taos.putArchiveEntry(entry);
shouldCloseEntry = true;