49 lines
1.4 KiB
Java
49 lines
1.4 KiB
Java
package eu.dnetlib.data.collector.plugins.archive.zip;
|
|
|
|
import java.io.File;
|
|
import java.net.MalformedURLException;
|
|
import java.net.URL;
|
|
import java.util.Iterator;
|
|
|
|
import com.google.common.base.Function;
|
|
import com.google.common.collect.Iterators;
|
|
|
|
import eu.dnetlib.data.collector.plugins.oai.engine.XmlCleaner;
|
|
import eu.dnetlib.data.collector.rmi.CollectorServiceException;
|
|
import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;
|
|
|
|
/**
|
|
*
|
|
* @author Andrea
|
|
*
|
|
*/
|
|
public class ZipIterable implements Iterable<String> {
|
|
|
|
/** The path to .zip archive. */
|
|
private File zipFile;
|
|
|
|
public ZipIterable(final InterfaceDescriptor interfaceDescriptor) throws CollectorServiceException {
|
|
try {
|
|
final String zipPath = interfaceDescriptor.getBaseUrl();
|
|
URL zipUrl = new URL(zipPath);
|
|
this.zipFile = new File(zipUrl.getPath());
|
|
if (!zipFile.exists()) { throw new CollectorServiceException(String.format("The base ULR %s, does not exist", zipFile.getPath())); }
|
|
} catch (MalformedURLException e) {
|
|
throw new CollectorServiceException("Zip collector failed! ", e);
|
|
}
|
|
}
|
|
|
|
@Override
|
|
public Iterator<String> iterator() {
|
|
final ZipIterator zipIterator = new ZipIterator(zipFile.getAbsolutePath());
|
|
return Iterators.transform(zipIterator, new Function<String, String>() {
|
|
|
|
@Override
|
|
public String apply(final String inputRecord) {
|
|
return XmlCleaner.cleanAllEntities(inputRecord.startsWith("\uFEFF") ? inputRecord.substring(1) : inputRecord);
|
|
}
|
|
});
|
|
}
|
|
|
|
}
|