Improve performance when downloading and decompressing the full-texts archive.

This commit is contained in:
Lampros Smyrnaios 2023-03-02 17:44:53 +02:00
parent 62a4279e3b
commit 7b217764e0
2 changed files with 21 additions and 31 deletions

View File

@ -9,11 +9,10 @@ import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
import java.io.BufferedInputStream;
import java.io.OutputStream;
import java.io.BufferedOutputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardCopyOption;
@Component
public class FileDecompressor {
@ -25,26 +24,25 @@ public class FileDecompressor {
{
// Decompress the zstd file.
Path tarPath = Paths.get(StringUtils.replace(zstdSource, ".zstd", "", 1)); // Remove the ".zstd" extension.
int readByte = -1;
try ( ZstdCompressorInputStream zsIn = new ZstdCompressorInputStream(new BufferedInputStream(Files.newInputStream(Paths.get(zstdSource))));
OutputStream out = Files.newOutputStream(tarPath) )
try ( ZstdCompressorInputStream zsIn = new ZstdCompressorInputStream(new BufferedInputStream(Files.newInputStream(Paths.get(zstdSource)), FileUtils.tenMb));
BufferedOutputStream out = new BufferedOutputStream(Files.newOutputStream(tarPath), FileUtils.tenMb) )
{
final byte[] buffer = new byte[1048576]; // 1 Mb
int n = 0;
while ( (n = zsIn.read(buffer)) != -1 ) {
out.write(buffer, 0, n);
}
while ( (readByte = zsIn.read()) != -1 )
out.write(readByte);
}
// Now we have a decompressed tar-file, which we will Un-tar, in order to extract the full-text files.
try ( TarArchiveInputStream tarInput = new TarArchiveInputStream(new BufferedInputStream(Files.newInputStream(tarPath))) )
try ( TarArchiveInputStream tarInput = new TarArchiveInputStream(new BufferedInputStream(Files.newInputStream(tarPath)), FileUtils.tenMb) )
{
TarArchiveEntry entry;
while ( ((entry = (TarArchiveEntry) tarInput.getNextEntry()) != null) )
{
String entryName = entry.getName();
Path targetFilePath = targetDir.resolve(entryName);
Files.copy(tarInput, targetFilePath, StandardCopyOption.REPLACE_EXISTING); // Copy an individual entry.
// Copy an individual entry.
BufferedOutputStream bufferedOutputStream = new BufferedOutputStream(Files.newOutputStream(targetDir.resolve(entry.getName())), FileUtils.tenMb);
while ( (readByte = tarInput.read()) != -1 )
bufferedOutputStream.write(readByte);
// No need to close the tarEntry.
}
}

View File

@ -484,29 +484,21 @@ public class FileUtils {
}
public boolean saveArchive(HttpURLConnection conn, File zstdFile) {
InputStream inStream = null;
FileOutputStream outStream = null;
try {
inStream = conn.getInputStream();
outStream = new FileOutputStream(zstdFile);
int readByte;
while ( (readByte = inStream.read()) != -1 ) {
outStream.write(readByte);
public static final int tenMb = 10 * 1_048_576;
public boolean saveArchive(HttpURLConnection conn, File zstdFile)
{
try ( BufferedInputStream inStream = new BufferedInputStream(conn.getInputStream(), tenMb);
BufferedOutputStream outStream = new BufferedOutputStream(Files.newOutputStream(zstdFile.toPath()), tenMb) )
{
int readBytes;
while ( (readBytes = inStream.read()) != -1 ) {
outStream.write(readBytes);
}
return true;
} catch (Exception e) {
logger.error("Could not save the zstd file \"" + zstdFile.getName() + "\": " + e.getMessage(), e);
return false;
} finally {
try {
if ( inStream != null )
inStream.close();
if ( outStream != null )
outStream.close();
} catch (Exception e) {
logger.error(e.getMessage(), e);
}
}
}