package eu.openaire.urls_controller.util; import org.apache.commons.compress.archivers.tar.TarArchiveEntry; import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; import org.apache.commons.compress.compressors.zstandard.ZstdCompressorInputStream; import org.apache.commons.lang.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.stereotype.Component; import java.io.BufferedInputStream; import java.io.OutputStream; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.nio.file.StandardCopyOption; @Component public class FileDecompressor { private static final Logger logger = LoggerFactory.getLogger(FileDecompressor.class); public void decompressFiles(String zstdSource, Path targetDir) throws Exception { // Decompress the zstd file. Path tarPath = Paths.get(StringUtils.replace(zstdSource, ".zstd", "", 1)); // Remove the ".zstd" extension. try ( ZstdCompressorInputStream zsIn = new ZstdCompressorInputStream(new BufferedInputStream(Files.newInputStream(Paths.get(zstdSource)))); OutputStream out = Files.newOutputStream(tarPath) ) { final byte[] buffer = new byte[1048576]; // 1 Mb int n = 0; while ( (n = zsIn.read(buffer)) != -1 ) { out.write(buffer, 0, n); } } // Now we have a decompressed tar-file, which we will Un-tar, in order to extract the full-text files. try ( TarArchiveInputStream tarInput = new TarArchiveInputStream(new BufferedInputStream(Files.newInputStream(tarPath))) ) { TarArchiveEntry entry; while ( ((entry = (TarArchiveEntry) tarInput.getNextEntry()) != null) ) { String entryName = entry.getName(); Path targetFilePath = targetDir.resolve(entryName); Files.copy(tarInput, targetFilePath, StandardCopyOption.REPLACE_EXISTING); // Copy an individual entry. // No need to close the tarEntry. } } // Now we have a batch-directory which contains the tar-file along with the extracted full-text files. // After uploading the full-texts, the batch-directories will be deleted. } }