41 lines
1.3 KiB
Java
41 lines
1.3 KiB
Java
|
|
package eu.dnetlib.dhp.common.collection;
|
|
|
|
import java.io.BufferedOutputStream;
|
|
import java.io.IOException;
|
|
import java.util.zip.GZIPOutputStream;
|
|
|
|
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
|
|
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
|
|
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
|
|
import org.apache.commons.io.IOUtils;
|
|
import org.apache.hadoop.fs.FSDataInputStream;
|
|
import org.apache.hadoop.fs.FSDataOutputStream;
|
|
import org.apache.hadoop.fs.FileSystem;
|
|
import org.apache.hadoop.fs.Path;
|
|
|
|
public class DecompressTarGz {
|
|
|
|
public static void doExtract(FileSystem fs, String outputPath, String tarGzPath) throws IOException {
|
|
|
|
FSDataInputStream inputFileStream = fs.open(new Path(tarGzPath));
|
|
try (TarArchiveInputStream tais = new TarArchiveInputStream(
|
|
new GzipCompressorInputStream(inputFileStream))) {
|
|
TarArchiveEntry entry = null;
|
|
while ((entry = tais.getNextTarEntry()) != null) {
|
|
if (!entry.isDirectory()) {
|
|
try (
|
|
FSDataOutputStream out = fs
|
|
.create(new Path(outputPath.concat(entry.getName()).concat(".gz")));
|
|
GZIPOutputStream gzipOs = new GZIPOutputStream(new BufferedOutputStream(out))) {
|
|
|
|
IOUtils.copy(tais, gzipOs);
|
|
|
|
}
|
|
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|