diff --git a/src/main/java/org/gcube/portlets/user/td/gwtservice/shared/csv/CSVFileUtil.java b/src/main/java/org/gcube/portlets/user/td/gwtservice/shared/csv/CSVFileUtil.java index a3be450..285e8b6 100644 --- a/src/main/java/org/gcube/portlets/user/td/gwtservice/shared/csv/CSVFileUtil.java +++ b/src/main/java/org/gcube/portlets/user/td/gwtservice/shared/csv/CSVFileUtil.java @@ -32,173 +32,208 @@ import org.slf4j.LoggerFactory; /** * - * @author "Giancarlo Panichi" - * g.panichi@isti.cnr.it - * + * @author "Giancarlo Panichi" g.panichi@isti.cnr.it + * */ public class CSVFileUtil { - + + private static final int MAXROWCHECK = 50000; protected static Logger logger = LoggerFactory.getLogger(CSVFileUtil.class); - - public static ArrayList getHeader(File csvFile, CSVParserConfiguration parserConfiguration) throws ParseException, IOException - { - + + public static ArrayList getHeader(File csvFile, + CSVParserConfiguration parserConfiguration) throws ParseException, + IOException { + CSVReader csvReader = createCSVReader(csvFile, parserConfiguration); switch (parserConfiguration.getHeaderPresence()) { - case FIRST_LINE_COMMENTED_INCLUDED: { - List firstLine = getFirstLine(csvReader, true); - return new ArrayList(firstLine); - } - case FIRST_LINE: { - List firstLine = getFirstLine(csvReader, false); - return new ArrayList(firstLine); - } - case NONE: break; + case FIRST_LINE_COMMENTED_INCLUDED: { + List firstLine = getFirstLine(csvReader, true); + return new ArrayList(firstLine); } - + case FIRST_LINE: { + List firstLine = getFirstLine(csvReader, false); + return new ArrayList(firstLine); + } + case NONE: + break; + } + int fieldCount = (int) csvReader.countFields(); ArrayList fakeHeaders = new ArrayList(); - for (int i = 0; i getFirstLine(CSVReader csvReader, boolean includeComment) throws ParseException, IOException - { - logger.trace("getFirstLine includeComment: "+includeComment); + public static List getFirstLine(CSVReader csvReader, + boolean includeComment) throws ParseException, IOException { + logger.trace("getFirstLine includeComment: " + includeComment); List header = csvReader.readLine(includeComment); - return header==null?Collections.emptyList():header; + return header == null ? Collections. emptyList() : header; } - - protected static CSVReader createCSVReader(File csvFile, CSVParserConfiguration parserConfiguration) throws FileNotFoundException - { - logger.trace("createCSVReader csvFile: "+csvFile+" parserConfiguration: "+parserConfiguration); - Reader fileReader = new InputStreamReader(new FileInputStream(csvFile), parserConfiguration.getCharset()); - CSVReader csvReader = new CSVReader(fileReader, parserConfiguration.getDelimiter(), parserConfiguration.getComment()); + + protected static CSVReader createCSVReader(File csvFile, + CSVParserConfiguration parserConfiguration) + throws FileNotFoundException { + logger.trace("createCSVReader csvFile: " + csvFile + + " parserConfiguration: " + parserConfiguration); + Reader fileReader = new InputStreamReader(new FileInputStream(csvFile), + parserConfiguration.getCharset()); + CSVReader csvReader = new CSVReader(fileReader, + parserConfiguration.getDelimiter(), + parserConfiguration.getComment()); return csvReader; } - - - public static File skipError(File inCSVFile, CSVParserConfiguration config) throws ParseException, IOException - { - return skipError(inCSVFile, config.getCharset(), config.getDelimiter(), config.getComment()); + + public static File skipError(File inCSVFile, CSVParserConfiguration config) + throws ParseException, IOException { + return skipError(inCSVFile, config.getCharset(), config.getDelimiter(), + config.getComment()); } - - - public static File skipError(File inCSVFile, Charset charset, char delimiter, char comment) throws ParseException, IOException - { + + public static File skipError(File inCSVFile, Charset charset, + char delimiter, char comment) throws ParseException, IOException { File outCSVFile = File.createTempFile("import", "csv"); outCSVFile.deleteOnExit(); - - BufferedReader fileReader = new BufferedReader(new InputStreamReader(new FileInputStream(inCSVFile), charset)); - BufferedWriter fileWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outCSVFile), charset)); - CSVWriter csvWriter = new CSVWriter(fileWriter,delimiter,comment); + + BufferedReader fileReader = new BufferedReader(new InputStreamReader( + new FileInputStream(inCSVFile), charset)); + BufferedWriter fileWriter = new BufferedWriter(new OutputStreamWriter( + new FileOutputStream(outCSVFile), charset)); + CSVWriter csvWriter = new CSVWriter(fileWriter, delimiter, comment); CSVReader csvReader = new CSVReader(fileReader, delimiter, comment); - - List line=null; - - do{ - try{ - line= csvReader.readLine(); - - }catch(ParseException exception) - { - logger.debug("Skip line:"+line.toString()); + + List line = null; + + do { + try { + line = csvReader.readLine(); + + } catch (ParseException exception) { + logger.debug("Skip line:" + line.toString()); continue; } csvWriter.writeLine(line); - - } while (line!=null); - + + } while (line != null); + csvReader.close(); csvWriter.close(); return outCSVFile; - } - - public static ArrayList checkCSV(File csvFile, CSVParserConfiguration config, long errorsLimit) throws ParseException, IOException - { - return checkCSV(csvFile, config.getCharset(), config.getDelimiter(), config.getComment(), errorsLimit); } - public static ArrayList checkCSV(File csvFile, Charset charset, char delimiter, char comment, long errorsLimit) throws IOException - { - logger.trace("checkCSV charset: "+charset+" delimiter: "+delimiter+" comment: "+comment); + public static ArrayList checkCSV(File csvFile, + CSVParserConfiguration config, long errorsLimit) + throws ParseException, IOException { + return checkCSV(csvFile, config.getCharset(), config.getDelimiter(), + config.getComment(), errorsLimit); + } + + public static ArrayList checkCSV(File csvFile, + Charset charset, char delimiter, char comment, long errorsLimit) + throws IOException { + logger.trace("checkCSV charset: " + charset + " delimiter: " + + delimiter + " comment: " + comment); ArrayList errors = new ArrayList(); - Reader fileReader = new InputStreamReader(new FileInputStream(csvFile), charset); + Reader fileReader = new InputStreamReader(new FileInputStream(csvFile), + charset); CSVReader csvReader = new CSVReader(fileReader, delimiter, comment); - + long count = -1; long fields = -1; + int maxRowCheck = 0; - do{ - try{ + do { + try { count = csvReader.countFields(); - }catch(ParseException exception) - { + } catch (ParseException exception) { StringBuilder errorMessage = new StringBuilder(); errorMessage.append("Error parsing the file "); errorMessage.append(exception.getMessage()); - - CSVRowError error = new CSVRowError(csvReader.getLineNumber(), csvReader.getCurrentLine(), errorMessage.toString()); + + CSVRowError error = new CSVRowError(csvReader.getLineNumber(), + csvReader.getCurrentLine(), errorMessage.toString()); errors.add(error); logger.trace(error.getErrorDescription()); continue; } - if (count>=0){ - if (fields<0 ) fields = count; + if (count >= 0) { + if (fields < 0) + fields = count; else if (fields != count) { StringBuilder errorMessage = new StringBuilder(); errorMessage.append("Expected "); errorMessage.append(fields); errorMessage.append(" fields, found "); errorMessage.append(count); - errorMessage.append(" fields. Check the format of your input file."); - CSVRowError error = new CSVRowError(csvReader.getLineNumber(), csvReader.getCurrentLine(), errorMessage.toString()); + errorMessage + .append(" fields. Check the format of your input file."); + CSVRowError error = new CSVRowError( + csvReader.getLineNumber(), + csvReader.getCurrentLine(), errorMessage.toString()); errors.add(error); logger.trace(error.getErrorDescription()); } } + maxRowCheck++; - }while(count >=0 && errors.size()= 0 && errors.size() < errorsLimit + && maxRowCheck < MAXROWCHECK); return errors; } - public static void toJson(File csvFile, Charset inputCharset, File outputFile, Charset outputCharset, HeaderPresence headerPresence, char delimiter, char comment, long limit) throws ParseException, IOException - { - toJson(new FileInputStream(csvFile), inputCharset, new FileOutputStream(outputFile), outputCharset, headerPresence, delimiter, comment, limit); + public static void toJson(File csvFile, Charset inputCharset, + File outputFile, Charset outputCharset, + HeaderPresence headerPresence, char delimiter, char comment, + long limit) throws ParseException, IOException { + toJson(new FileInputStream(csvFile), inputCharset, + new FileOutputStream(outputFile), outputCharset, + headerPresence, delimiter, comment, limit); } - public static void toJson(InputStream csv, OutputStream output, Charset outputCharset, CSVParserConfiguration config, long limit) throws ParseException, IOException - { - toJson(csv, config.getCharset(), output, outputCharset, config.getHeaderPresence(), config.getDelimiter(), config.getComment(), limit); + public static void toJson(InputStream csv, OutputStream output, + Charset outputCharset, CSVParserConfiguration config, long limit) + throws ParseException, IOException { + toJson(csv, config.getCharset(), output, outputCharset, + config.getHeaderPresence(), config.getDelimiter(), + config.getComment(), limit); } - public static void toJson(InputStream csv, Charset inputCharset, OutputStream output, Charset outputCharset, HeaderPresence headerPresence, char delimiter, char comment, long limit) throws ParseException, IOException - { - logger.trace("toJson charset: "+inputCharset+" delimiter: "+delimiter+" comment: "+comment); - Writer writer = new BufferedWriter(new OutputStreamWriter(output, outputCharset)); + public static void toJson(InputStream csv, Charset inputCharset, + OutputStream output, Charset outputCharset, + HeaderPresence headerPresence, char delimiter, char comment, + long limit) throws ParseException, IOException { + logger.trace("toJson charset: " + inputCharset + " delimiter: " + + delimiter + " comment: " + comment); + Writer writer = new BufferedWriter(new OutputStreamWriter(output, + outputCharset)); Reader reader = new InputStreamReader(csv, inputCharset); CSVReader csvReader = new CSVReader(reader, delimiter, comment); String jsonLine; long count = 0; - if (headerPresence!=HeaderPresence.NONE) { - csvReader.readLine(headerPresence==HeaderPresence.FIRST_LINE_COMMENTED_INCLUDED); + if (headerPresence != HeaderPresence.NONE) { + csvReader + .readLine(headerPresence == HeaderPresence.FIRST_LINE_COMMENTED_INCLUDED); } writer.write("{\"records\":["); - while((jsonLine = csvReader.readJSonLine())!=null){ + while ((jsonLine = csvReader.readJSonLine()) != null) { - if (count>0) writer.write(","); + if (count > 0) + writer.write(","); writer.write(jsonLine); count++; - if (count>limit) break; + if (count > limit) + break; } writer.write("]}"); @@ -207,9 +242,8 @@ public class CSVFileUtil { writer.close(); } - - public static String guessEncoding(File file) throws IOException - { + + public static String guessEncoding(File file) throws IOException { FileInputStream fis = new FileInputStream(file); UniversalDetector detector = new UniversalDetector(null); byte[] buf = new byte[4096]; @@ -223,7 +257,7 @@ public class CSVFileUtil { String encoding = detector.getDetectedCharset(); detector.reset(); fis.close(); - + return encoding; }