package eu.dnetlib.data.collector.plugins.archive.targz; import java.io.BufferedInputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.util.Iterator; import java.util.zip.GZIPInputStream; import org.apache.commons.compress.archivers.tar.TarArchiveEntry; import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; public class TarGzIterator implements Iterator { /** The Constant log. */ private static final Log log = LogFactory.getLog(TarGzIterator.class); private TarArchiveInputStream tarInputStream; private String current; public TarGzIterator(final String tarGzPath) { try { this.tarInputStream = new TarArchiveInputStream(new BufferedInputStream(new GZIPInputStream(new FileInputStream(tarGzPath)))); this.current = findNext(); } catch (FileNotFoundException e) { log.error("Tar.gz file not found: " + tarGzPath, e); } catch (IOException e) { log.error("Problem opening tar.gz file " + tarGzPath, e); } } public TarGzIterator(final File tarGzFile) { try { this.tarInputStream = new TarArchiveInputStream(new BufferedInputStream(new GZIPInputStream(new FileInputStream(tarGzFile)))); this.current = findNext(); } catch (FileNotFoundException e) { log.error("Tar.gz file not found: " + tarGzFile.getAbsolutePath(), e); } catch (IOException e) { log.error("Problem opening tar.gz file " + tarGzFile.getAbsolutePath(), e); } } @Override public boolean hasNext() { return current != null; } @Override public String next() { String ret = new String(current); current = findNext(); return ret; } @Override public void remove() {} private synchronized String findNext() { TarArchiveEntry entry = null; try { while (null != (entry = tarInputStream.getNextTarEntry()) && !entry.isFile()) { log.debug("Skipping TAR entry " + entry.getName()); } } catch (IOException e) { log.error("Error during tar.gz extraction", e); } if (entry == null) { return null; } else { log.debug("Extracting " + entry.getName()); byte[] content = new byte[(int) entry.getSize()]; try { tarInputStream.read(content, 0, content.length); return new String(content); } catch (IOException e) { log.error("Impossible to extract file " + entry.getName(), e); return null; } } } }