Fix reading files from HDFS in FileCollector & FileGZipCollector plugins
parent
81c4496d32
commit
623f7be26d
@ -1,25 +1,31 @@
|
||||
package eu.dnetlib.dhp.collection.plugin.file;
|
||||
|
||||
import eu.dnetlib.dhp.common.collection.CollectorException;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.FileInputStream;
|
||||
|
||||
public class FileCollectorPlugin extends AbstractSplittedRecordPlugin {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(FileCollectorPlugin.class);
|
||||
|
||||
public FileCollectorPlugin(FileSystem fileSystem) {
|
||||
super(fileSystem);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected BufferedInputStream getBufferedInputStream(final String baseUrl) throws CollectorException {
|
||||
protected BufferedInputStream getBufferedInputStream(final Path filePath) throws CollectorException {
|
||||
|
||||
log.info("baseUrl: {}", baseUrl);
|
||||
log.info("filePath: {}", filePath);
|
||||
|
||||
try {
|
||||
return new BufferedInputStream(new FileInputStream(baseUrl));
|
||||
FileSystem fs = super.getFileSystem();
|
||||
return new BufferedInputStream(fs.open(filePath));
|
||||
} catch (Exception e) {
|
||||
throw new CollectorException("Error reading file " + baseUrl, e);
|
||||
throw new CollectorException("Error reading file " + filePath, e);
|
||||
}
|
||||
}
|
||||
}
|
@ -1,28 +1,33 @@
|
||||
package eu.dnetlib.dhp.collection.plugin.file;
|
||||
|
||||
import eu.dnetlib.dhp.common.collection.CollectorException;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.*;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.io.BufferedInputStream;
|
||||
import java.util.zip.GZIPInputStream;
|
||||
|
||||
public class FileGZipCollectorPlugin extends AbstractSplittedRecordPlugin {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(FileGZipCollectorPlugin.class);
|
||||
|
||||
public FileGZipCollectorPlugin(FileSystem fileSystem) {
|
||||
super(fileSystem);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected BufferedInputStream getBufferedInputStream(String baseUrl) throws CollectorException {
|
||||
protected BufferedInputStream getBufferedInputStream(final Path filePath) throws CollectorException {
|
||||
|
||||
log.info("baseUrl: {}", baseUrl);
|
||||
log.info("filePath: {}", filePath);
|
||||
|
||||
try {
|
||||
GZIPInputStream stream = new GZIPInputStream(new FileInputStream(baseUrl));
|
||||
FileSystem fs = super.getFileSystem();
|
||||
GZIPInputStream stream = new GZIPInputStream(fs.open(filePath));
|
||||
return new BufferedInputStream(stream);
|
||||
} catch (Exception e) {
|
||||
throw new CollectorException("Error reading file " + baseUrl, e);
|
||||
throw new CollectorException("Error reading file " + filePath, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue