53 lines
1.5 KiB
Java
53 lines
1.5 KiB
Java
|
|
package eu.dnetlib.dhp.actionmanager.project.utils;
|
|
|
|
import java.io.*;
|
|
import java.util.Optional;
|
|
|
|
import org.apache.commons.io.IOUtils;
|
|
import org.apache.hadoop.conf.Configuration;
|
|
import org.apache.hadoop.fs.FileSystem;
|
|
|
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
|
import eu.dnetlib.dhp.common.collection.GetCSV;
|
|
import eu.dnetlib.dhp.common.collection.HttpConnector2;
|
|
|
|
/**
|
|
* Applies the parsing of a csv file and writes the Serialization of it in hdfs
|
|
*/
|
|
public class ReadCSV {
|
|
|
|
public static void main(final String[] args) throws Exception {
|
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
|
IOUtils
|
|
.toString(
|
|
ReadCSV.class
|
|
.getResourceAsStream(
|
|
"/eu/dnetlib/dhp/actionmanager/project/parameters.json")));
|
|
|
|
parser.parseArgument(args);
|
|
|
|
final String fileURL = parser.get("fileURL");
|
|
final String hdfsPath = parser.get("hdfsPath");
|
|
final String hdfsNameNode = parser.get("hdfsNameNode");
|
|
final String classForName = parser.get("classForName");
|
|
Optional<String> delimiter = Optional.ofNullable(parser.get("delimiter"));
|
|
char del = ';';
|
|
if (delimiter.isPresent())
|
|
del = delimiter.get().charAt(0);
|
|
|
|
Configuration conf = new Configuration();
|
|
conf.set("fs.defaultFS", hdfsNameNode);
|
|
|
|
FileSystem fileSystem = FileSystem.get(conf);
|
|
BufferedReader reader = new BufferedReader(
|
|
new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL)));
|
|
|
|
GetCSV.getCsv(fileSystem, reader, hdfsPath, classForName, del);
|
|
|
|
reader.close();
|
|
|
|
}
|
|
|
|
}
|