package eu.openaire.urls_controller.util; import org.apache.commons.compress.archivers.tar.TarArchiveEntry; import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; import org.apache.commons.compress.compressors.zstandard.ZstdCompressorInputStream; import org.apache.commons.lang.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.stereotype.Component; import java.io.BufferedInputStream; import java.io.BufferedOutputStream; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; @Component public class FileDecompressor { private static final Logger logger = LoggerFactory.getLogger(FileDecompressor.class); public void decompressFiles(String zstdSource, Path targetDir) throws Exception { // Decompress the zstd file. Path tarPath = Paths.get(StringUtils.replace(zstdSource, ".zstd", "", 1)); // Remove the ".zstd" extension. int readByte = -1; try ( ZstdCompressorInputStream zsIn = new ZstdCompressorInputStream(new BufferedInputStream(Files.newInputStream(Paths.get(zstdSource)), FileUtils.tenMb)); BufferedOutputStream out = new BufferedOutputStream(Files.newOutputStream(tarPath), FileUtils.tenMb) ) { while ( (readByte = zsIn.read()) != -1 ) out.write(readByte); } // Now we have a decompressed tar-file, which we will Un-tar, in order to extract the full-text files. try ( TarArchiveInputStream tarInput = new TarArchiveInputStream(new BufferedInputStream(Files.newInputStream(tarPath)), FileUtils.tenMb) ) { TarArchiveEntry entry; while ( ((entry = (TarArchiveEntry) tarInput.getNextEntry()) != null) ) { // Copy an individual entry. try ( BufferedOutputStream bufferedOutputStream = new BufferedOutputStream(Files.newOutputStream(targetDir.resolve(entry.getName())), FileUtils.tenMb) ) { while ( (readByte = tarInput.read()) != -1 ) bufferedOutputStream.write(readByte); } // The exception will be given to the caller method. // No need to close the tarEntry (no "close()" method is provided). } } // Now we have a batch-directory which contains the tar-file along with the extracted full-text files. // After uploading the full-texts, the batch-directories will be deleted. } }