Minor Update

git-svn-id: https://svn.d4science.research-infrastructures.eu/gcube/trunk/portlets/user/tabular-data-gwt-service@98958 82a268e6-3cf1-43bd-a215-b396298e98cf
This commit is contained in:
Giancarlo Panichi 2014-07-24 17:14:07 +00:00
parent 78a763998a
commit c2d1cccc8e
1 changed files with 130 additions and 96 deletions

View File

@ -32,173 +32,208 @@ import org.slf4j.LoggerFactory;
/** /**
* *
* @author "Giancarlo Panichi" * @author "Giancarlo Panichi" <a
* <a href="mailto:g.panichi@isti.cnr.it">g.panichi@isti.cnr.it</a> * href="mailto:g.panichi@isti.cnr.it">g.panichi@isti.cnr.it</a>
* *
*/ */
public class CSVFileUtil { public class CSVFileUtil {
private static final int MAXROWCHECK = 50000;
protected static Logger logger = LoggerFactory.getLogger(CSVFileUtil.class); protected static Logger logger = LoggerFactory.getLogger(CSVFileUtil.class);
public static ArrayList<String> getHeader(File csvFile, CSVParserConfiguration parserConfiguration) throws ParseException, IOException public static ArrayList<String> getHeader(File csvFile,
{ CSVParserConfiguration parserConfiguration) throws ParseException,
IOException {
CSVReader csvReader = createCSVReader(csvFile, parserConfiguration); CSVReader csvReader = createCSVReader(csvFile, parserConfiguration);
switch (parserConfiguration.getHeaderPresence()) { switch (parserConfiguration.getHeaderPresence()) {
case FIRST_LINE_COMMENTED_INCLUDED: { case FIRST_LINE_COMMENTED_INCLUDED: {
List<String> firstLine = getFirstLine(csvReader, true); List<String> firstLine = getFirstLine(csvReader, true);
return new ArrayList<String>(firstLine); return new ArrayList<String>(firstLine);
} }
case FIRST_LINE: { case FIRST_LINE: {
List<String> firstLine = getFirstLine(csvReader, false); List<String> firstLine = getFirstLine(csvReader, false);
return new ArrayList<String>(firstLine); return new ArrayList<String>(firstLine);
} }
case NONE: break; case NONE:
break;
} }
int fieldCount = (int) csvReader.countFields(); int fieldCount = (int) csvReader.countFields();
ArrayList<String> fakeHeaders = new ArrayList<String>(); ArrayList<String> fakeHeaders = new ArrayList<String>();
for (int i = 0; i<fieldCount; i++) fakeHeaders.add("Field "+i); for (int i = 0; i < fieldCount; i++)
fakeHeaders.add("Field " + i);
return fakeHeaders; return fakeHeaders;
} }
public static List<String> getFirstLine(CSVReader csvReader, boolean includeComment) throws ParseException, IOException public static List<String> getFirstLine(CSVReader csvReader,
{ boolean includeComment) throws ParseException, IOException {
logger.trace("getFirstLine includeComment: "+includeComment); logger.trace("getFirstLine includeComment: " + includeComment);
List<String> header = csvReader.readLine(includeComment); List<String> header = csvReader.readLine(includeComment);
return header==null?Collections.<String>emptyList():header; return header == null ? Collections.<String> emptyList() : header;
} }
protected static CSVReader createCSVReader(File csvFile, CSVParserConfiguration parserConfiguration) throws FileNotFoundException protected static CSVReader createCSVReader(File csvFile,
{ CSVParserConfiguration parserConfiguration)
logger.trace("createCSVReader csvFile: "+csvFile+" parserConfiguration: "+parserConfiguration); throws FileNotFoundException {
Reader fileReader = new InputStreamReader(new FileInputStream(csvFile), parserConfiguration.getCharset()); logger.trace("createCSVReader csvFile: " + csvFile
CSVReader csvReader = new CSVReader(fileReader, parserConfiguration.getDelimiter(), parserConfiguration.getComment()); + " parserConfiguration: " + parserConfiguration);
Reader fileReader = new InputStreamReader(new FileInputStream(csvFile),
parserConfiguration.getCharset());
CSVReader csvReader = new CSVReader(fileReader,
parserConfiguration.getDelimiter(),
parserConfiguration.getComment());
return csvReader; return csvReader;
} }
public static File skipError(File inCSVFile, CSVParserConfiguration config)
public static File skipError(File inCSVFile, CSVParserConfiguration config) throws ParseException, IOException throws ParseException, IOException {
{ return skipError(inCSVFile, config.getCharset(), config.getDelimiter(),
return skipError(inCSVFile, config.getCharset(), config.getDelimiter(), config.getComment()); config.getComment());
} }
public static File skipError(File inCSVFile, Charset charset,
public static File skipError(File inCSVFile, Charset charset, char delimiter, char comment) throws ParseException, IOException char delimiter, char comment) throws ParseException, IOException {
{
File outCSVFile = File.createTempFile("import", "csv"); File outCSVFile = File.createTempFile("import", "csv");
outCSVFile.deleteOnExit(); outCSVFile.deleteOnExit();
BufferedReader fileReader = new BufferedReader(new InputStreamReader(new FileInputStream(inCSVFile), charset)); BufferedReader fileReader = new BufferedReader(new InputStreamReader(
BufferedWriter fileWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outCSVFile), charset)); new FileInputStream(inCSVFile), charset));
CSVWriter csvWriter = new CSVWriter(fileWriter,delimiter,comment); BufferedWriter fileWriter = new BufferedWriter(new OutputStreamWriter(
new FileOutputStream(outCSVFile), charset));
CSVWriter csvWriter = new CSVWriter(fileWriter, delimiter, comment);
CSVReader csvReader = new CSVReader(fileReader, delimiter, comment); CSVReader csvReader = new CSVReader(fileReader, delimiter, comment);
List<String> line=null; List<String> line = null;
do{ do {
try{ try {
line= csvReader.readLine(); line = csvReader.readLine();
}catch(ParseException exception) } catch (ParseException exception) {
{ logger.debug("Skip line:" + line.toString());
logger.debug("Skip line:"+line.toString());
continue; continue;
} }
csvWriter.writeLine(line); csvWriter.writeLine(line);
} while (line!=null); } while (line != null);
csvReader.close(); csvReader.close();
csvWriter.close(); csvWriter.close();
return outCSVFile; return outCSVFile;
} }
public static ArrayList<CSVRowError> checkCSV(File csvFile, CSVParserConfiguration config, long errorsLimit) throws ParseException, IOException public static ArrayList<CSVRowError> checkCSV(File csvFile,
{ CSVParserConfiguration config, long errorsLimit)
return checkCSV(csvFile, config.getCharset(), config.getDelimiter(), config.getComment(), errorsLimit); throws ParseException, IOException {
return checkCSV(csvFile, config.getCharset(), config.getDelimiter(),
config.getComment(), errorsLimit);
} }
public static ArrayList<CSVRowError> checkCSV(File csvFile, Charset charset, char delimiter, char comment, long errorsLimit) throws IOException public static ArrayList<CSVRowError> checkCSV(File csvFile,
{ Charset charset, char delimiter, char comment, long errorsLimit)
logger.trace("checkCSV charset: "+charset+" delimiter: "+delimiter+" comment: "+comment); throws IOException {
logger.trace("checkCSV charset: " + charset + " delimiter: "
+ delimiter + " comment: " + comment);
ArrayList<CSVRowError> errors = new ArrayList<CSVRowError>(); ArrayList<CSVRowError> errors = new ArrayList<CSVRowError>();
Reader fileReader = new InputStreamReader(new FileInputStream(csvFile), charset); Reader fileReader = new InputStreamReader(new FileInputStream(csvFile),
charset);
CSVReader csvReader = new CSVReader(fileReader, delimiter, comment); CSVReader csvReader = new CSVReader(fileReader, delimiter, comment);
long count = -1; long count = -1;
long fields = -1; long fields = -1;
int maxRowCheck = 0;
do{ do {
try{ try {
count = csvReader.countFields(); count = csvReader.countFields();
}catch(ParseException exception) } catch (ParseException exception) {
{
StringBuilder errorMessage = new StringBuilder(); StringBuilder errorMessage = new StringBuilder();
errorMessage.append("Error parsing the file "); errorMessage.append("Error parsing the file ");
errorMessage.append(exception.getMessage()); errorMessage.append(exception.getMessage());
CSVRowError error = new CSVRowError(csvReader.getLineNumber(), csvReader.getCurrentLine(), errorMessage.toString()); CSVRowError error = new CSVRowError(csvReader.getLineNumber(),
csvReader.getCurrentLine(), errorMessage.toString());
errors.add(error); errors.add(error);
logger.trace(error.getErrorDescription()); logger.trace(error.getErrorDescription());
continue; continue;
} }
if (count>=0){ if (count >= 0) {
if (fields<0 ) fields = count; if (fields < 0)
fields = count;
else if (fields != count) { else if (fields != count) {
StringBuilder errorMessage = new StringBuilder(); StringBuilder errorMessage = new StringBuilder();
errorMessage.append("Expected "); errorMessage.append("Expected ");
errorMessage.append(fields); errorMessage.append(fields);
errorMessage.append(" fields, found "); errorMessage.append(" fields, found ");
errorMessage.append(count); errorMessage.append(count);
errorMessage.append(" fields. Check the format of your input file."); errorMessage
CSVRowError error = new CSVRowError(csvReader.getLineNumber(), csvReader.getCurrentLine(), errorMessage.toString()); .append(" fields. Check the format of your input file.");
CSVRowError error = new CSVRowError(
csvReader.getLineNumber(),
csvReader.getCurrentLine(), errorMessage.toString());
errors.add(error); errors.add(error);
logger.trace(error.getErrorDescription()); logger.trace(error.getErrorDescription());
} }
} }
maxRowCheck++;
}while(count >=0 && errors.size()<errorsLimit); } while (count >= 0 && errors.size() < errorsLimit
&& maxRowCheck < MAXROWCHECK);
return errors; return errors;
} }
public static void toJson(File csvFile, Charset inputCharset, File outputFile, Charset outputCharset, HeaderPresence headerPresence, char delimiter, char comment, long limit) throws ParseException, IOException public static void toJson(File csvFile, Charset inputCharset,
{ File outputFile, Charset outputCharset,
toJson(new FileInputStream(csvFile), inputCharset, new FileOutputStream(outputFile), outputCharset, headerPresence, delimiter, comment, limit); HeaderPresence headerPresence, char delimiter, char comment,
long limit) throws ParseException, IOException {
toJson(new FileInputStream(csvFile), inputCharset,
new FileOutputStream(outputFile), outputCharset,
headerPresence, delimiter, comment, limit);
} }
public static void toJson(InputStream csv, OutputStream output, Charset outputCharset, CSVParserConfiguration config, long limit) throws ParseException, IOException public static void toJson(InputStream csv, OutputStream output,
{ Charset outputCharset, CSVParserConfiguration config, long limit)
toJson(csv, config.getCharset(), output, outputCharset, config.getHeaderPresence(), config.getDelimiter(), config.getComment(), limit); throws ParseException, IOException {
toJson(csv, config.getCharset(), output, outputCharset,
config.getHeaderPresence(), config.getDelimiter(),
config.getComment(), limit);
} }
public static void toJson(InputStream csv, Charset inputCharset, OutputStream output, Charset outputCharset, HeaderPresence headerPresence, char delimiter, char comment, long limit) throws ParseException, IOException public static void toJson(InputStream csv, Charset inputCharset,
{ OutputStream output, Charset outputCharset,
logger.trace("toJson charset: "+inputCharset+" delimiter: "+delimiter+" comment: "+comment); HeaderPresence headerPresence, char delimiter, char comment,
Writer writer = new BufferedWriter(new OutputStreamWriter(output, outputCharset)); long limit) throws ParseException, IOException {
logger.trace("toJson charset: " + inputCharset + " delimiter: "
+ delimiter + " comment: " + comment);
Writer writer = new BufferedWriter(new OutputStreamWriter(output,
outputCharset));
Reader reader = new InputStreamReader(csv, inputCharset); Reader reader = new InputStreamReader(csv, inputCharset);
CSVReader csvReader = new CSVReader(reader, delimiter, comment); CSVReader csvReader = new CSVReader(reader, delimiter, comment);
String jsonLine; String jsonLine;
long count = 0; long count = 0;
if (headerPresence!=HeaderPresence.NONE) { if (headerPresence != HeaderPresence.NONE) {
csvReader.readLine(headerPresence==HeaderPresence.FIRST_LINE_COMMENTED_INCLUDED); csvReader
.readLine(headerPresence == HeaderPresence.FIRST_LINE_COMMENTED_INCLUDED);
} }
writer.write("{\"records\":["); writer.write("{\"records\":[");
while((jsonLine = csvReader.readJSonLine())!=null){ while ((jsonLine = csvReader.readJSonLine()) != null) {
if (count>0) writer.write(","); if (count > 0)
writer.write(",");
writer.write(jsonLine); writer.write(jsonLine);
count++; count++;
if (count>limit) break; if (count > limit)
break;
} }
writer.write("]}"); writer.write("]}");
@ -208,8 +243,7 @@ public class CSVFileUtil {
} }
public static String guessEncoding(File file) throws IOException public static String guessEncoding(File file) throws IOException {
{
FileInputStream fis = new FileInputStream(file); FileInputStream fis = new FileInputStream(file);
UniversalDetector detector = new UniversalDetector(null); UniversalDetector detector = new UniversalDetector(null);
byte[] buf = new byte[4096]; byte[] buf = new byte[4096];