dnet-docker/dnet-app/libs/dnet-wf-executor-common/src/main/java/eu/dnetlib/wfs/collector/filesystem/FilesystemCollectorPlugin.java

50 lines
1.7 KiB
Java

package eu.dnetlib.wfs.collector.filesystem;
import java.io.File;
import java.io.FileInputStream;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.time.LocalDateTime;
import java.util.Map;
import java.util.stream.Stream;
import org.apache.commons.io.IOUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import eu.dnetlib.errors.DnetException;
import eu.dnetlib.utils.DnetStreamSupport;
import eu.dnetlib.utils.XmlCleaner;
import eu.dnetlib.wfs.annotations.CollectorPlugin;
import eu.dnetlib.wfs.collector.DnetCollectorPlugin;
@CollectorPlugin("filesystem")
public class FilesystemCollectorPlugin implements DnetCollectorPlugin {
private static final Log log = LogFactory.getLog(FilesystemCollectorPlugin.class);
@Override
public Stream<String> collect(final String baseUrl, final Map<String, String> apiParams, final LocalDateTime from, final LocalDateTime until)
throws Exception {
final URL basePath = new URL(baseUrl);
final File baseDir = new File(basePath.getPath());
if (!baseDir.exists()) { throw new DnetException(String.format("The base URL %s, does not exist", basePath.getPath())); }
final FileSystemIterator fsi = new FileSystemIterator(baseDir.getAbsolutePath(), apiParams.getOrDefault("extensions", "xml"));
return DnetStreamSupport.stream(fsi).map(this::loadFile);
}
private String loadFile(final String inputFileName) {
try (FileInputStream fileInputStream = new FileInputStream(inputFileName)) {
final String s = IOUtils.toString(fileInputStream, StandardCharsets.UTF_8.toString());
return XmlCleaner.cleanAllEntities(s.startsWith("\uFEFF") ? s.substring(1) : s);
} catch (final Exception e) {
log.error("Unable to read " + inputFileName);
return "";
}
}
}