Improve performance when archiving and compressing the full-texts.
This commit is contained in:
parent
ff4fd3d289
commit
ba989484e4
|
@ -6,8 +6,12 @@ import org.apache.commons.compress.compressors.zstandard.ZstdCompressorOutputStr
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.*;
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.BufferedOutputStream;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.List;
|
||||
|
||||
|
@ -15,6 +19,9 @@ public class FilesCompressor {
|
|||
|
||||
private static final Logger logger = LoggerFactory.getLogger(FilesCompressor.class);
|
||||
|
||||
static final int tenMb = 10 * 1_048_576;
|
||||
|
||||
|
||||
public static File compressMultipleFilesIntoOne(long assignmentsCounter, int zipBatchCounter, List<String> filesToCompress, String baseDirectory)
|
||||
{
|
||||
// For example: assignments_2_full-texts_4.zip | where < 4 > is referred to the 4th batch of files requested by the Controller.
|
||||
|
@ -30,13 +37,12 @@ public class FilesCompressor {
|
|||
String zStandardFileFullPath = tarFilePath + ".zstd";
|
||||
File zStandardFile = new File(zStandardFileFullPath);
|
||||
|
||||
try ( InputStream in = Files.newInputStream(Paths.get(tarFilePath));
|
||||
ZstdCompressorOutputStream zOut = new ZstdCompressorOutputStream(new BufferedOutputStream(Files.newOutputStream(zStandardFile.toPath()))) )
|
||||
try ( BufferedInputStream in = new BufferedInputStream(Files.newInputStream(Paths.get(tarFilePath)), tenMb);
|
||||
ZstdCompressorOutputStream zOut = new ZstdCompressorOutputStream(new BufferedOutputStream(Files.newOutputStream(zStandardFile.toPath())), tenMb) )
|
||||
{
|
||||
final byte[] buffer = new byte[1048576]; // 1 Mb
|
||||
int numBytes = 0;
|
||||
while ( (numBytes = in.read(buffer)) != -1 ) {
|
||||
zOut.write(buffer, 0, numBytes);
|
||||
int readByte;
|
||||
while ( (readByte = in.read()) != -1 ) {
|
||||
zOut.write(readByte);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
logger.error("Exception when compressing the tar-archive: " + tarFilePath, e);
|
||||
|
@ -60,7 +66,7 @@ public class FilesCompressor {
|
|||
int numTarredFiles = 0;
|
||||
File tarFile = new File(tarFileFullPath);
|
||||
|
||||
try ( TarArchiveOutputStream taos = new TarArchiveOutputStream(Files.newOutputStream(tarFile.toPath())) )
|
||||
try ( TarArchiveOutputStream taos = new TarArchiveOutputStream(new BufferedOutputStream(Files.newOutputStream(tarFile.toPath()), tenMb)) )
|
||||
{
|
||||
for ( String fileName : filesToTar ) {
|
||||
if ( addTarEntry(taos, fileName, baseDir) )
|
||||
|
@ -79,11 +85,11 @@ public class FilesCompressor {
|
|||
{
|
||||
boolean shouldCloseEntry = false; // Useful in order to know if we should close the entry (an Exception may appear, and so we should not try to close it).
|
||||
|
||||
String fullFileName = baseDir + fileName;
|
||||
try ( FileInputStream fis = new FileInputStream(fullFileName) )
|
||||
Path fullFileNamePath = Paths.get(baseDir + fileName);
|
||||
try ( BufferedInputStream fis = new BufferedInputStream(Files.newInputStream(fullFileNamePath), tenMb) )
|
||||
{
|
||||
TarArchiveEntry entry = new TarArchiveEntry(fileName);
|
||||
entry.setSize(Files.size(Paths.get(fullFileName))); // Yes, tar requires that we set the size beforehand..
|
||||
entry.setSize(Files.size(fullFileNamePath)); // Yes, tar requires that we set the size beforehand..
|
||||
taos.putArchiveEntry(entry);
|
||||
shouldCloseEntry = true;
|
||||
|
||||
|
|
Loading…
Reference in New Issue