package eu.dnetlib.data.collector.plugins.archive.zip; import java.io.File; import java.net.MalformedURLException; import java.net.URL; import java.util.Iterator; import com.google.common.base.Function; import com.google.common.collect.Iterators; import eu.dnetlib.data.collector.plugins.oai.engine.XmlCleaner; import eu.dnetlib.data.collector.rmi.CollectorServiceException; import eu.dnetlib.data.collector.rmi.InterfaceDescriptor; /** * * @author Andrea * */ public class ZipIterable implements Iterable { /** The path to .zip archive. */ private File zipFile; public ZipIterable(final InterfaceDescriptor interfaceDescriptor) throws CollectorServiceException { try { final String zipPath = interfaceDescriptor.getBaseUrl(); URL zipUrl = new URL(zipPath); this.zipFile = new File(zipUrl.getPath()); if (!zipFile.exists()) { throw new CollectorServiceException(String.format("The base ULR %s, does not exist", zipFile.getPath())); } } catch (MalformedURLException e) { throw new CollectorServiceException("Zip collector failed! ", e); } } @Override public Iterator iterator() { final ZipIterator zipIterator = new ZipIterator(zipFile.getAbsolutePath()); return Iterators.transform(zipIterator, new Function() { @Override public String apply(final String inputRecord) { return XmlCleaner.cleanAllEntities(inputRecord.startsWith("\uFEFF") ? inputRecord.substring(1) : inputRecord); } }); } }