package eu.dnetlib.wfs.collector.filesystem; import java.io.File; import java.io.FileInputStream; import java.net.URL; import java.nio.charset.StandardCharsets; import java.time.LocalDateTime; import java.util.Map; import java.util.stream.Stream; import org.apache.commons.io.IOUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import eu.dnetlib.errors.DnetException; import eu.dnetlib.utils.DnetStreamSupport; import eu.dnetlib.utils.XmlCleaner; import eu.dnetlib.wfs.annotations.CollectorPlugin; import eu.dnetlib.wfs.collector.DnetCollectorPlugin; @CollectorPlugin("filesystem") public class FilesystemCollectorPlugin implements DnetCollectorPlugin { private static final Log log = LogFactory.getLog(FilesystemCollectorPlugin.class); @Override public Stream collect(final String baseUrl, final Map apiParams, final LocalDateTime from, final LocalDateTime until) throws Exception { final URL basePath = new URL(baseUrl); final File baseDir = new File(basePath.getPath()); if (!baseDir.exists()) { throw new DnetException(String.format("The base URL %s, does not exist", basePath.getPath())); } final FileSystemIterator fsi = new FileSystemIterator(baseDir.getAbsolutePath(), apiParams.getOrDefault("extensions", "xml")); return DnetStreamSupport.stream(fsi).map(this::loadFile); } private String loadFile(final String inputFileName) { try (FileInputStream fileInputStream = new FileInputStream(inputFileName)) { final String s = IOUtils.toString(fileInputStream, StandardCharsets.UTF_8.toString()); return XmlCleaner.cleanAllEntities(s.startsWith("\uFEFF") ? s.substring(1) : s); } catch (final Exception e) { log.error("Unable to read " + inputFileName); return ""; } } }