57 lines
2.3 KiB
Java
57 lines
2.3 KiB
Java
package eu.openaire.urls_controller.util;
|
|
|
|
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
|
|
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
|
|
import org.apache.commons.compress.compressors.zstandard.ZstdCompressorInputStream;
|
|
import org.apache.commons.lang.StringUtils;
|
|
import org.slf4j.Logger;
|
|
import org.slf4j.LoggerFactory;
|
|
import org.springframework.stereotype.Component;
|
|
|
|
import java.io.BufferedInputStream;
|
|
import java.io.OutputStream;
|
|
import java.nio.file.Files;
|
|
import java.nio.file.Path;
|
|
import java.nio.file.Paths;
|
|
import java.nio.file.StandardCopyOption;
|
|
|
|
@Component
|
|
public class FileDecompressor {
|
|
|
|
private static final Logger logger = LoggerFactory.getLogger(FileDecompressor.class);
|
|
|
|
|
|
public void decompressFiles(String zstdSource, Path targetDir) throws Exception
|
|
{
|
|
// Decompress the zstd file.
|
|
Path tarPath = Paths.get(StringUtils.replace(zstdSource, ".zstd", "", 1)); // Remove the ".zstd" extension.
|
|
|
|
try ( ZstdCompressorInputStream zsIn = new ZstdCompressorInputStream(new BufferedInputStream(Files.newInputStream(Paths.get(zstdSource))));
|
|
OutputStream out = Files.newOutputStream(tarPath) )
|
|
{
|
|
final byte[] buffer = new byte[1048576]; // 1 Mb
|
|
int n = 0;
|
|
while ( (n = zsIn.read(buffer)) != -1 ) {
|
|
out.write(buffer, 0, n);
|
|
}
|
|
}
|
|
|
|
// Now we have a decompressed tar-file, which we will Un-tar, in order to extract the full-text files.
|
|
try ( TarArchiveInputStream tarInput = new TarArchiveInputStream(new BufferedInputStream(Files.newInputStream(tarPath))) )
|
|
{
|
|
TarArchiveEntry entry;
|
|
while ( ((entry = (TarArchiveEntry) tarInput.getNextEntry()) != null) )
|
|
{
|
|
String entryName = entry.getName();
|
|
Path targetFilePath = targetDir.resolve(entryName);
|
|
Files.copy(tarInput, targetFilePath, StandardCopyOption.REPLACE_EXISTING); // Copy an individual entry.
|
|
// No need to close the tarEntry.
|
|
}
|
|
}
|
|
|
|
// Now we have a batch-directory which contains the tar-file along with the extracted full-text files.
|
|
// After uploading the full-texts, the batch-directories will be deleted.
|
|
}
|
|
|
|
}
|