all files under Access Protocol > Base URL folder are collected together
This commit is contained in:
parent
fc43b5a85d
commit
4047d32587
|
@ -1,27 +1,67 @@
|
||||||
package eu.dnetlib.data.collector.plugins.ariadneplus.ads;
|
package eu.dnetlib.data.collector.plugins.ariadneplus.ads;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.net.MalformedURLException;
|
||||||
|
import java.net.URL;
|
||||||
|
import java.util.Iterator;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
|
||||||
|
import com.google.common.collect.Iterators;
|
||||||
|
|
||||||
import eu.dnetlib.data.collector.plugins.FileCollectorPlugin;
|
import eu.dnetlib.data.collector.plugins.FileCollectorPlugin;
|
||||||
|
import eu.dnetlib.data.collector.plugins.filesystem.FileSystemIterator;
|
||||||
import eu.dnetlib.rmi.data.CollectorServiceException;
|
import eu.dnetlib.rmi.data.CollectorServiceException;
|
||||||
import eu.dnetlib.rmi.data.InterfaceDescriptor;
|
import eu.dnetlib.rmi.data.InterfaceDescriptor;
|
||||||
|
|
||||||
|
|
||||||
public class ADSCollectorPlugin extends FileCollectorPlugin {
|
public class ADSCollectorPlugin extends FileCollectorPlugin {
|
||||||
|
|
||||||
|
private Iterator<String> recordIterator;
|
||||||
|
private URL basePath;
|
||||||
|
|
||||||
|
/** The Constant log. */
|
||||||
|
private static final Log log = LogFactory.getLog(ADSCollectorPlugin.class);
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Iterable<String> collect(final InterfaceDescriptor interfaceDescriptor, final String fromDate, final String untilDate)
|
public Iterable<String> collect(final InterfaceDescriptor interfaceDescriptor, final String fromDate, final String untilDate)
|
||||||
throws CollectorServiceException {
|
throws CollectorServiceException {
|
||||||
|
|
||||||
final String baseUrl = interfaceDescriptor.getBaseUrl();
|
final String baseUrl = interfaceDescriptor.getBaseUrl();
|
||||||
|
if (baseUrl == null || baseUrl.isEmpty()) { throw new CollectorServiceException("Param 'baseurl' is null or empty"); }
|
||||||
final String suffixToUrl = interfaceDescriptor.getParams().get("suffixToBaseUrl");
|
String url = "file://".concat(baseUrl);
|
||||||
|
|
||||||
return () -> {
|
|
||||||
try {
|
try {
|
||||||
return new ADSIterator(super.collect(interfaceDescriptor, fromDate, untilDate).iterator(), baseUrl, suffixToUrl);
|
basePath = new URL(url);
|
||||||
} catch (CollectorServiceException e) {
|
} catch (MalformedURLException mue) {
|
||||||
throw new RuntimeException(e);
|
log.error("Failed collecting from base url " + url, mue);
|
||||||
|
throw new CollectorServiceException(mue);
|
||||||
}
|
}
|
||||||
} ;
|
|
||||||
|
File baseDir = new File(basePath.getPath());
|
||||||
|
if (!baseDir.exists()) { throw new CollectorServiceException(String.format("The base ULR %s, does not exist", basePath.getPath())); }
|
||||||
|
|
||||||
|
log.debug("Start collecting from folder " + baseDir + " ...");
|
||||||
|
final FileSystemIterator fsi = new FileSystemIterator(baseDir.getAbsolutePath(), "xml");
|
||||||
|
|
||||||
|
boolean emptyIterator = true;
|
||||||
|
while (fsi.hasNext()) {
|
||||||
|
String nextFilePath = fsi.next();
|
||||||
|
interfaceDescriptor.setBaseUrl("file://".concat(nextFilePath));
|
||||||
|
try {
|
||||||
|
log.debug("Add iterator from " + nextFilePath);
|
||||||
|
if (emptyIterator) {
|
||||||
|
recordIterator = new ADSIterator(super.collect(interfaceDescriptor, fromDate, untilDate).iterator(), null, null);
|
||||||
|
emptyIterator = false;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
recordIterator = Iterators.concat(recordIterator, new ADSIterator(super.collect(interfaceDescriptor, fromDate, untilDate).iterator(), null, null));
|
||||||
|
}
|
||||||
|
} catch (CollectorServiceException e) {
|
||||||
|
log.error("Failed collecting from path: " + nextFilePath, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return new ADSIterable(recordIterator);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,41 @@
|
||||||
|
package eu.dnetlib.data.collector.plugins.ariadneplus.ads;
|
||||||
|
|
||||||
|
import java.util.Iterator;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The Class FilesystemIterable.
|
||||||
|
*
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public class ADSIterable implements Iterable<String> {
|
||||||
|
|
||||||
|
/** The Constant log. */
|
||||||
|
private static final Log log = LogFactory.getLog(ADSIterable.class);
|
||||||
|
|
||||||
|
private Iterator<String> recordIterator;
|
||||||
|
|
||||||
|
public ADSIterable(Iterator<String> recordIterator ) {
|
||||||
|
this.recordIterator = recordIterator;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@inheritDoc}
|
||||||
|
*
|
||||||
|
* @see java.lang.Iterable#iterator()
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public Iterator<String> iterator() {
|
||||||
|
return recordIterator;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Iterator<String> getRecordIterator() {
|
||||||
|
return recordIterator;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setRecordIterator(Iterator<String> recordIterator) {
|
||||||
|
this.recordIterator = recordIterator;
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue