forked from lsmyrnaios/UrlsController
Improve performance when downloading and decompressing the full-texts archive.
This commit is contained in:
parent
62a4279e3b
commit
7b217764e0
|
@ -9,11 +9,10 @@ import org.slf4j.LoggerFactory;
|
|||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.io.BufferedOutputStream;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.nio.file.StandardCopyOption;
|
||||
|
||||
@Component
|
||||
public class FileDecompressor {
|
||||
|
@ -25,26 +24,25 @@ public class FileDecompressor {
|
|||
{
|
||||
// Decompress the zstd file.
|
||||
Path tarPath = Paths.get(StringUtils.replace(zstdSource, ".zstd", "", 1)); // Remove the ".zstd" extension.
|
||||
int readByte = -1;
|
||||
|
||||
try ( ZstdCompressorInputStream zsIn = new ZstdCompressorInputStream(new BufferedInputStream(Files.newInputStream(Paths.get(zstdSource))));
|
||||
OutputStream out = Files.newOutputStream(tarPath) )
|
||||
try ( ZstdCompressorInputStream zsIn = new ZstdCompressorInputStream(new BufferedInputStream(Files.newInputStream(Paths.get(zstdSource)), FileUtils.tenMb));
|
||||
BufferedOutputStream out = new BufferedOutputStream(Files.newOutputStream(tarPath), FileUtils.tenMb) )
|
||||
{
|
||||
final byte[] buffer = new byte[1048576]; // 1 Mb
|
||||
int n = 0;
|
||||
while ( (n = zsIn.read(buffer)) != -1 ) {
|
||||
out.write(buffer, 0, n);
|
||||
}
|
||||
while ( (readByte = zsIn.read()) != -1 )
|
||||
out.write(readByte);
|
||||
}
|
||||
|
||||
// Now we have a decompressed tar-file, which we will Un-tar, in order to extract the full-text files.
|
||||
try ( TarArchiveInputStream tarInput = new TarArchiveInputStream(new BufferedInputStream(Files.newInputStream(tarPath))) )
|
||||
try ( TarArchiveInputStream tarInput = new TarArchiveInputStream(new BufferedInputStream(Files.newInputStream(tarPath)), FileUtils.tenMb) )
|
||||
{
|
||||
TarArchiveEntry entry;
|
||||
while ( ((entry = (TarArchiveEntry) tarInput.getNextEntry()) != null) )
|
||||
{
|
||||
String entryName = entry.getName();
|
||||
Path targetFilePath = targetDir.resolve(entryName);
|
||||
Files.copy(tarInput, targetFilePath, StandardCopyOption.REPLACE_EXISTING); // Copy an individual entry.
|
||||
// Copy an individual entry.
|
||||
BufferedOutputStream bufferedOutputStream = new BufferedOutputStream(Files.newOutputStream(targetDir.resolve(entry.getName())), FileUtils.tenMb);
|
||||
while ( (readByte = tarInput.read()) != -1 )
|
||||
bufferedOutputStream.write(readByte);
|
||||
// No need to close the tarEntry.
|
||||
}
|
||||
}
|
||||
|
|
|
@ -484,29 +484,21 @@ public class FileUtils {
|
|||
}
|
||||
|
||||
|
||||
public boolean saveArchive(HttpURLConnection conn, File zstdFile) {
|
||||
InputStream inStream = null;
|
||||
FileOutputStream outStream = null;
|
||||
try {
|
||||
inStream = conn.getInputStream();
|
||||
outStream = new FileOutputStream(zstdFile);
|
||||
int readByte;
|
||||
while ( (readByte = inStream.read()) != -1 ) {
|
||||
outStream.write(readByte);
|
||||
public static final int tenMb = 10 * 1_048_576;
|
||||
|
||||
public boolean saveArchive(HttpURLConnection conn, File zstdFile)
|
||||
{
|
||||
try ( BufferedInputStream inStream = new BufferedInputStream(conn.getInputStream(), tenMb);
|
||||
BufferedOutputStream outStream = new BufferedOutputStream(Files.newOutputStream(zstdFile.toPath()), tenMb) )
|
||||
{
|
||||
int readBytes;
|
||||
while ( (readBytes = inStream.read()) != -1 ) {
|
||||
outStream.write(readBytes);
|
||||
}
|
||||
return true;
|
||||
} catch (Exception e) {
|
||||
logger.error("Could not save the zstd file \"" + zstdFile.getName() + "\": " + e.getMessage(), e);
|
||||
return false;
|
||||
} finally {
|
||||
try {
|
||||
if ( inStream != null )
|
||||
inStream.close();
|
||||
if ( outStream != null )
|
||||
outStream.close();
|
||||
} catch (Exception e) {
|
||||
logger.error(e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue