Improve performance when downloading and decompressing the full-texts archive.

This commit is contained in:
Lampros Smyrnaios 2023-03-02 17:44:53 +02:00
parent 62a4279e3b
commit 7b217764e0
2 changed files with 21 additions and 31 deletions

View File

@ -9,11 +9,10 @@ import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component; import org.springframework.stereotype.Component;
import java.io.BufferedInputStream; import java.io.BufferedInputStream;
import java.io.OutputStream; import java.io.BufferedOutputStream;
import java.nio.file.Files; import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
import java.nio.file.Paths; import java.nio.file.Paths;
import java.nio.file.StandardCopyOption;
@Component @Component
public class FileDecompressor { public class FileDecompressor {
@ -25,26 +24,25 @@ public class FileDecompressor {
{ {
// Decompress the zstd file. // Decompress the zstd file.
Path tarPath = Paths.get(StringUtils.replace(zstdSource, ".zstd", "", 1)); // Remove the ".zstd" extension. Path tarPath = Paths.get(StringUtils.replace(zstdSource, ".zstd", "", 1)); // Remove the ".zstd" extension.
int readByte = -1;
try ( ZstdCompressorInputStream zsIn = new ZstdCompressorInputStream(new BufferedInputStream(Files.newInputStream(Paths.get(zstdSource)))); try ( ZstdCompressorInputStream zsIn = new ZstdCompressorInputStream(new BufferedInputStream(Files.newInputStream(Paths.get(zstdSource)), FileUtils.tenMb));
OutputStream out = Files.newOutputStream(tarPath) ) BufferedOutputStream out = new BufferedOutputStream(Files.newOutputStream(tarPath), FileUtils.tenMb) )
{ {
final byte[] buffer = new byte[1048576]; // 1 Mb while ( (readByte = zsIn.read()) != -1 )
int n = 0; out.write(readByte);
while ( (n = zsIn.read(buffer)) != -1 ) {
out.write(buffer, 0, n);
}
} }
// Now we have a decompressed tar-file, which we will Un-tar, in order to extract the full-text files. // Now we have a decompressed tar-file, which we will Un-tar, in order to extract the full-text files.
try ( TarArchiveInputStream tarInput = new TarArchiveInputStream(new BufferedInputStream(Files.newInputStream(tarPath))) ) try ( TarArchiveInputStream tarInput = new TarArchiveInputStream(new BufferedInputStream(Files.newInputStream(tarPath)), FileUtils.tenMb) )
{ {
TarArchiveEntry entry; TarArchiveEntry entry;
while ( ((entry = (TarArchiveEntry) tarInput.getNextEntry()) != null) ) while ( ((entry = (TarArchiveEntry) tarInput.getNextEntry()) != null) )
{ {
String entryName = entry.getName(); // Copy an individual entry.
Path targetFilePath = targetDir.resolve(entryName); BufferedOutputStream bufferedOutputStream = new BufferedOutputStream(Files.newOutputStream(targetDir.resolve(entry.getName())), FileUtils.tenMb);
Files.copy(tarInput, targetFilePath, StandardCopyOption.REPLACE_EXISTING); // Copy an individual entry. while ( (readByte = tarInput.read()) != -1 )
bufferedOutputStream.write(readByte);
// No need to close the tarEntry. // No need to close the tarEntry.
} }
} }

View File

@ -484,29 +484,21 @@ public class FileUtils {
} }
public boolean saveArchive(HttpURLConnection conn, File zstdFile) { public static final int tenMb = 10 * 1_048_576;
InputStream inStream = null;
FileOutputStream outStream = null; public boolean saveArchive(HttpURLConnection conn, File zstdFile)
try { {
inStream = conn.getInputStream(); try ( BufferedInputStream inStream = new BufferedInputStream(conn.getInputStream(), tenMb);
outStream = new FileOutputStream(zstdFile); BufferedOutputStream outStream = new BufferedOutputStream(Files.newOutputStream(zstdFile.toPath()), tenMb) )
int readByte; {
while ( (readByte = inStream.read()) != -1 ) { int readBytes;
outStream.write(readByte); while ( (readBytes = inStream.read()) != -1 ) {
outStream.write(readBytes);
} }
return true; return true;
} catch (Exception e) { } catch (Exception e) {
logger.error("Could not save the zstd file \"" + zstdFile.getName() + "\": " + e.getMessage(), e); logger.error("Could not save the zstd file \"" + zstdFile.getName() + "\": " + e.getMessage(), e);
return false; return false;
} finally {
try {
if ( inStream != null )
inStream.close();
if ( outStream != null )
outStream.close();
} catch (Exception e) {
logger.error(e.getMessage(), e);
}
} }
} }