dnet-core/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/archive/targz/TarGzIterator.java

87 lines
2.4 KiB
Java

package eu.dnetlib.data.collector.plugins.archive.targz;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Iterator;
import java.util.zip.GZIPInputStream;
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
public class TarGzIterator implements Iterator<String> {
/** The Constant log. */
private static final Log log = LogFactory.getLog(TarGzIterator.class);
private TarArchiveInputStream tarInputStream;
private String current;
public TarGzIterator(final String tarGzPath) {
try {
this.tarInputStream = new TarArchiveInputStream(new BufferedInputStream(new GZIPInputStream(new FileInputStream(tarGzPath))));
this.current = findNext();
} catch (FileNotFoundException e) {
log.error("Tar.gz file not found: " + tarGzPath, e);
} catch (IOException e) {
log.error("Problem opening tar.gz file " + tarGzPath, e);
}
}
public TarGzIterator(final File tarGzFile) {
try {
this.tarInputStream = new TarArchiveInputStream(new BufferedInputStream(new GZIPInputStream(new FileInputStream(tarGzFile))));
this.current = findNext();
} catch (FileNotFoundException e) {
log.error("Tar.gz file not found: " + tarGzFile.getAbsolutePath(), e);
} catch (IOException e) {
log.error("Problem opening tar.gz file " + tarGzFile.getAbsolutePath(), e);
}
}
@Override
public boolean hasNext() {
return current != null;
}
@Override
public String next() {
String ret = new String(current);
current = findNext();
return ret;
}
@Override
public void remove() {}
private synchronized String findNext() {
TarArchiveEntry entry = null;
try {
while (null != (entry = tarInputStream.getNextTarEntry()) && !entry.isFile()) {
log.debug("Skipping TAR entry " + entry.getName());
}
} catch (IOException e) {
log.error("Error during tar.gz extraction", e);
}
if (entry == null) {
return null;
} else {
log.debug("Extracting " + entry.getName());
byte[] content = new byte[(int) entry.getSize()];
try {
tarInputStream.read(content, 0, content.length);
return new String(content);
} catch (IOException e) {
log.error("Impossible to extract file " + entry.getName(), e);
return null;
}
}
}
}