AriadnePlus/dnet-ariadneplus/src/main/java/eu/dnetlib/data/collector/plugins/ariadneplus/ads/FolderCollectorPlugin.java

67 lines
2.3 KiB
Java

package eu.dnetlib.data.collector.plugins.ariadneplus.ads;
import java.io.File;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Iterator;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import com.google.common.collect.Iterators;
import eu.dnetlib.data.collector.plugins.filesystem.FileSystemIterator;
import eu.dnetlib.rmi.data.CollectorServiceException;
import eu.dnetlib.rmi.data.InterfaceDescriptor;
public class FolderCollectorPlugin extends FileJSONCollectorPlugin {
private Iterator<String> recordIterator;
private URL basePath;
/** The Constant log. */
private static final Log log = LogFactory.getLog(FolderCollectorPlugin.class);
@Override
public Iterable<String> collect(final InterfaceDescriptor interfaceDescriptor, final String fromDate, final String untilDate)
throws CollectorServiceException {
final String baseUrl = interfaceDescriptor.getBaseUrl();
if (baseUrl == null || baseUrl.isEmpty()) { throw new CollectorServiceException("Param 'baseurl' is null or empty"); }
String url = "file://".concat(baseUrl);
try {
basePath = new URL(url);
} catch (MalformedURLException mue) {
log.error("Failed collecting json from base url " + url, mue);
throw new CollectorServiceException(mue);
}
File baseDir = new File(basePath.getPath());
if (!baseDir.exists()) { throw new CollectorServiceException(String.format("The base ULR %s, does not exist", basePath.getPath())); }
log.debug("Start collecting json from folder " + baseDir + " ...");
final FileSystemIterator fsi = new FileSystemIterator(baseDir.getAbsolutePath(), "json");
boolean emptyIterator = true;
while (fsi.hasNext()) {
String nextFilePath = fsi.next();
interfaceDescriptor.setBaseUrl("file://".concat(nextFilePath));
try {
log.debug("Add iterator from " + nextFilePath);
if (emptyIterator) {
recordIterator = super.collect(interfaceDescriptor, fromDate, untilDate).iterator();
emptyIterator = false;
}
else {
recordIterator = Iterators.concat(recordIterator, super.collect(interfaceDescriptor, fromDate, untilDate).iterator());
}
} catch (CollectorServiceException e) {
log.error("Failed collecting json from path: " + nextFilePath, e);
}
}
return new ADSIterable(recordIterator);
}
}