dnet-core/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/archive/zip/ZipIterable.java

49 lines
1.4 KiB
Java

package eu.dnetlib.data.collector.plugins.archive.zip;
import java.io.File;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Iterator;
import com.google.common.base.Function;
import com.google.common.collect.Iterators;
import eu.dnetlib.data.collector.plugins.oai.engine.XmlCleaner;
import eu.dnetlib.data.collector.rmi.CollectorServiceException;
import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;
/**
*
* @author Andrea
*
*/
public class ZipIterable implements Iterable<String> {
/** The path to .zip archive. */
private File zipFile;
public ZipIterable(final InterfaceDescriptor interfaceDescriptor) throws CollectorServiceException {
try {
final String zipPath = interfaceDescriptor.getBaseUrl();
URL zipUrl = new URL(zipPath);
this.zipFile = new File(zipUrl.getPath());
if (!zipFile.exists()) { throw new CollectorServiceException(String.format("The base ULR %s, does not exist", zipFile.getPath())); }
} catch (MalformedURLException e) {
throw new CollectorServiceException("Zip collector failed! ", e);
}
}
@Override
public Iterator<String> iterator() {
final ZipIterator zipIterator = new ZipIterator(zipFile.getAbsolutePath());
return Iterators.transform(zipIterator, new Function<String, String>() {
@Override
public String apply(final String inputRecord) {
return XmlCleaner.cleanAllEntities(inputRecord.startsWith("\uFEFF") ? inputRecord.substring(1) : inputRecord);
}
});
}
}