Improve performance when archiving and compressing the full-texts.

This commit is contained in:
Lampros Smyrnaios 2023-03-02 17:47:58 +02:00
parent ff4fd3d289
commit ba989484e4
1 changed files with 17 additions and 11 deletions

View File

@ -6,8 +6,12 @@ import org.apache.commons.compress.compressors.zstandard.ZstdCompressorOutputStr
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import java.io.*; import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.IOException;
import java.nio.file.Files; import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths; import java.nio.file.Paths;
import java.util.List; import java.util.List;
@ -15,6 +19,9 @@ public class FilesCompressor {
private static final Logger logger = LoggerFactory.getLogger(FilesCompressor.class); private static final Logger logger = LoggerFactory.getLogger(FilesCompressor.class);
static final int tenMb = 10 * 1_048_576;
public static File compressMultipleFilesIntoOne(long assignmentsCounter, int zipBatchCounter, List<String> filesToCompress, String baseDirectory) public static File compressMultipleFilesIntoOne(long assignmentsCounter, int zipBatchCounter, List<String> filesToCompress, String baseDirectory)
{ {
// For example: assignments_2_full-texts_4.zip | where < 4 > is referred to the 4th batch of files requested by the Controller. // For example: assignments_2_full-texts_4.zip | where < 4 > is referred to the 4th batch of files requested by the Controller.
@ -30,13 +37,12 @@ public class FilesCompressor {
String zStandardFileFullPath = tarFilePath + ".zstd"; String zStandardFileFullPath = tarFilePath + ".zstd";
File zStandardFile = new File(zStandardFileFullPath); File zStandardFile = new File(zStandardFileFullPath);
try ( InputStream in = Files.newInputStream(Paths.get(tarFilePath)); try ( BufferedInputStream in = new BufferedInputStream(Files.newInputStream(Paths.get(tarFilePath)), tenMb);
ZstdCompressorOutputStream zOut = new ZstdCompressorOutputStream(new BufferedOutputStream(Files.newOutputStream(zStandardFile.toPath()))) ) ZstdCompressorOutputStream zOut = new ZstdCompressorOutputStream(new BufferedOutputStream(Files.newOutputStream(zStandardFile.toPath())), tenMb) )
{ {
final byte[] buffer = new byte[1048576]; // 1 Mb int readByte;
int numBytes = 0; while ( (readByte = in.read()) != -1 ) {
while ( (numBytes = in.read(buffer)) != -1 ) { zOut.write(readByte);
zOut.write(buffer, 0, numBytes);
} }
} catch (Exception e) { } catch (Exception e) {
logger.error("Exception when compressing the tar-archive: " + tarFilePath, e); logger.error("Exception when compressing the tar-archive: " + tarFilePath, e);
@ -60,7 +66,7 @@ public class FilesCompressor {
int numTarredFiles = 0; int numTarredFiles = 0;
File tarFile = new File(tarFileFullPath); File tarFile = new File(tarFileFullPath);
try ( TarArchiveOutputStream taos = new TarArchiveOutputStream(Files.newOutputStream(tarFile.toPath())) ) try ( TarArchiveOutputStream taos = new TarArchiveOutputStream(new BufferedOutputStream(Files.newOutputStream(tarFile.toPath()), tenMb)) )
{ {
for ( String fileName : filesToTar ) { for ( String fileName : filesToTar ) {
if ( addTarEntry(taos, fileName, baseDir) ) if ( addTarEntry(taos, fileName, baseDir) )
@ -79,11 +85,11 @@ public class FilesCompressor {
{ {
boolean shouldCloseEntry = false; // Useful in order to know if we should close the entry (an Exception may appear, and so we should not try to close it). boolean shouldCloseEntry = false; // Useful in order to know if we should close the entry (an Exception may appear, and so we should not try to close it).
String fullFileName = baseDir + fileName; Path fullFileNamePath = Paths.get(baseDir + fileName);
try ( FileInputStream fis = new FileInputStream(fullFileName) ) try ( BufferedInputStream fis = new BufferedInputStream(Files.newInputStream(fullFileNamePath), tenMb) )
{ {
TarArchiveEntry entry = new TarArchiveEntry(fileName); TarArchiveEntry entry = new TarArchiveEntry(fileName);
entry.setSize(Files.size(Paths.get(fullFileName))); // Yes, tar requires that we set the size beforehand.. entry.setSize(Files.size(fullFileNamePath)); // Yes, tar requires that we set the size beforehand..
taos.putArchiveEntry(entry); taos.putArchiveEntry(entry);
shouldCloseEntry = true; shouldCloseEntry = true;