87 lines
2.4 KiB
Java
87 lines
2.4 KiB
Java
package eu.dnetlib.data.collector.plugins.archive.targz;
|
|
|
|
import java.io.BufferedInputStream;
|
|
import java.io.File;
|
|
import java.io.FileInputStream;
|
|
import java.io.FileNotFoundException;
|
|
import java.io.IOException;
|
|
import java.util.Iterator;
|
|
import java.util.zip.GZIPInputStream;
|
|
|
|
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
|
|
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
|
|
import org.apache.commons.logging.Log;
|
|
import org.apache.commons.logging.LogFactory;
|
|
|
|
public class TarGzIterator implements Iterator<String> {
|
|
|
|
/** The Constant log. */
|
|
private static final Log log = LogFactory.getLog(TarGzIterator.class);
|
|
|
|
private TarArchiveInputStream tarInputStream;
|
|
private String current;
|
|
|
|
public TarGzIterator(final String tarGzPath) {
|
|
try {
|
|
this.tarInputStream = new TarArchiveInputStream(new BufferedInputStream(new GZIPInputStream(new FileInputStream(tarGzPath))));
|
|
this.current = findNext();
|
|
} catch (FileNotFoundException e) {
|
|
log.error("Tar.gz file not found: " + tarGzPath, e);
|
|
} catch (IOException e) {
|
|
log.error("Problem opening tar.gz file " + tarGzPath, e);
|
|
}
|
|
}
|
|
|
|
public TarGzIterator(final File tarGzFile) {
|
|
try {
|
|
this.tarInputStream = new TarArchiveInputStream(new BufferedInputStream(new GZIPInputStream(new FileInputStream(tarGzFile))));
|
|
this.current = findNext();
|
|
} catch (FileNotFoundException e) {
|
|
log.error("Tar.gz file not found: " + tarGzFile.getAbsolutePath(), e);
|
|
} catch (IOException e) {
|
|
log.error("Problem opening tar.gz file " + tarGzFile.getAbsolutePath(), e);
|
|
}
|
|
}
|
|
|
|
@Override
|
|
public boolean hasNext() {
|
|
return current != null;
|
|
}
|
|
|
|
@Override
|
|
public String next() {
|
|
String ret = new String(current);
|
|
current = findNext();
|
|
return ret;
|
|
}
|
|
|
|
@Override
|
|
public void remove() {}
|
|
|
|
private synchronized String findNext() {
|
|
TarArchiveEntry entry = null;
|
|
try {
|
|
while (null != (entry = tarInputStream.getNextTarEntry()) && !entry.isFile()) {
|
|
log.debug("Skipping TAR entry " + entry.getName());
|
|
}
|
|
} catch (IOException e) {
|
|
log.error("Error during tar.gz extraction", e);
|
|
}
|
|
|
|
if (entry == null) {
|
|
return null;
|
|
} else {
|
|
log.debug("Extracting " + entry.getName());
|
|
byte[] content = new byte[(int) entry.getSize()];
|
|
try {
|
|
tarInputStream.read(content, 0, content.length);
|
|
return new String(content);
|
|
} catch (IOException e) {
|
|
log.error("Impossible to extract file " + entry.getName(), e);
|
|
return null;
|
|
}
|
|
|
|
}
|
|
}
|
|
}
|