blob: 582652f18169a1fc42e44bfd3edc0ccd4e1aaf3b [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.csv;
import static org.apache.commons.csv.Constants.CR;
import static org.apache.commons.csv.Constants.CRLF;
import static org.apache.commons.csv.Constants.LF;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PipedReader;
import java.io.PipedWriter;
import java.io.Reader;
import java.io.StringReader;
import java.io.StringWriter;
import java.net.URL;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import org.apache.commons.io.input.BOMInputStream;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
/**
* CSVParserTest
*
* The test are organized in three different sections: The 'setter/getter' section, the lexer section and finally the
* parser section. In case a test fails, you should follow a top-down approach for fixing a potential bug (its likely
* that the parser itself fails if the lexer has problems...).
*/
public class CSVParserTest {
private static final Charset UTF_8 = StandardCharsets.UTF_8;
private static final String UTF_8_NAME = UTF_8.name();
private static final String CSV_INPUT = "a,b,c,d\n" + " a , b , 1 2 \n" + "\"foo baar\", b,\n"
// + " \"foo\n,,\n\"\",,\n\\\"\",d,e\n";
+ " \"foo\n,,\n\"\",,\n\"\"\",d,e\n"; // changed to use standard CSV escaping
private static final String CSV_INPUT_1 = "a,b,c,d";
private static final String CSV_INPUT_2 = "a,b,1 2";
private static final String[][] RESULT = { { "a", "b", "c", "d" }, { "a", "b", "1 2" }, { "foo baar", "b", "" },
{ "foo\n,,\n\",,\n\"", "d", "e" } };
private BOMInputStream createBOMInputStream(final String resource) throws IOException {
final URL url = ClassLoader.getSystemClassLoader().getResource(resource);
return new BOMInputStream(url.openStream());
}
private void parseFully(final CSVParser parser) {
for (final CSVRecord csvRecord : parser) {
assertNotNull(csvRecord);
}
}
@Test
public void testBackslashEscaping() throws IOException {
// To avoid confusion over the need for escaping chars in java code,
// We will test with a forward slash as the escape char, and a single
// quote as the encapsulator.
final String code = "one,two,three\n" // 0
+ "'',''\n" // 1) empty encapsulators
+ "/',/'\n" // 2) single encapsulators
+ "'/'','/''\n" // 3) single encapsulators encapsulated via escape
+ "'''',''''\n" // 4) single encapsulators encapsulated via doubling
+ "/,,/,\n" // 5) separator escaped
+ "//,//\n" // 6) escape escaped
+ "'//','//'\n" // 7) escape escaped in encapsulation
+ " 8 , \"quoted \"\" /\" // string\" \n" // don't eat spaces
+ "9, /\n \n" // escaped newline
+ "";
final String[][] res = { { "one", "two", "three" }, // 0
{ "", "" }, // 1
{ "'", "'" }, // 2
{ "'", "'" }, // 3
{ "'", "'" }, // 4
{ ",", "," }, // 5
{ "/", "/" }, // 6
{ "/", "/" }, // 7
{ " 8 ", " \"quoted \"\" /\" / string\" " }, { "9", " \n " }, };
final CSVFormat format = CSVFormat.newFormat(',').withQuote('\'').withRecordSeparator(CRLF).withEscape('/')
.withIgnoreEmptyLines();
try (final CSVParser parser = CSVParser.parse(code, format)) {
final List<CSVRecord> records = parser.getRecords();
assertTrue(records.size() > 0);
Utils.compare("Records do not match expected result", res, records);
}
}
@Test
public void testBackslashEscaping2() throws IOException {
// To avoid confusion over the need for escaping chars in java code,
// We will test with a forward slash as the escape char, and a single
// quote as the encapsulator.
final String code = "" + " , , \n" // 1)
+ " \t , , \n" // 2)
+ " // , /, , /,\n" // 3)
+ "";
final String[][] res = { { " ", " ", " " }, // 1
{ " \t ", " ", " " }, // 2
{ " / ", " , ", " ," }, // 3
};
final CSVFormat format = CSVFormat.newFormat(',').withRecordSeparator(CRLF).withEscape('/')
.withIgnoreEmptyLines();
try (final CSVParser parser = CSVParser.parse(code, format)) {
final List<CSVRecord> records = parser.getRecords();
assertTrue(records.size() > 0);
Utils.compare("", res, records);
}
}
@Test
@Disabled
public void testBackslashEscapingOld() throws IOException {
final String code = "one,two,three\n" + "on\\\"e,two\n" + "on\"e,two\n" + "one,\"tw\\\"o\"\n" +
"one,\"t\\,wo\"\n" + "one,two,\"th,ree\"\n" + "\"a\\\\\"\n" + "a\\,b\n" + "\"a\\\\,b\"";
final String[][] res = { { "one", "two", "three" }, { "on\\\"e", "two" }, { "on\"e", "two" },
{ "one", "tw\"o" }, { "one", "t\\,wo" }, // backslash in quotes only escapes a delimiter (",")
{ "one", "two", "th,ree" }, { "a\\\\" }, // backslash in quotes only escapes a delimiter (",")
{ "a\\", "b" }, // a backslash must be returned
{ "a\\\\,b" } // backslash in quotes only escapes a delimiter (",")
};
try (final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT)) {
final List<CSVRecord> records = parser.getRecords();
assertEquals(res.length, records.size());
assertTrue(records.size() > 0);
for (int i = 0; i < res.length; i++) {
assertArrayEquals(res[i], records.get(i).values());
}
}
}
@Test
@Disabled("CSV-107")
public void testBOM() throws IOException {
final URL url = ClassLoader.getSystemClassLoader().getResource("CSVFileParser/bom.csv");
try (final CSVParser parser = CSVParser.parse(url, Charset.forName(UTF_8_NAME), CSVFormat.EXCEL.withHeader())) {
for (final CSVRecord record : parser) {
final String string = record.get("Date");
assertNotNull(string);
// System.out.println("date: " + record.get("Date"));
}
}
}
@Test
public void testBOMInputStream_ParserWithInputStream() throws IOException {
try (final BOMInputStream inputStream = createBOMInputStream("CSVFileParser/bom.csv");
final CSVParser parser = CSVParser.parse(inputStream, UTF_8, CSVFormat.EXCEL.withHeader())) {
for (final CSVRecord record : parser) {
final String string = record.get("Date");
assertNotNull(string);
// System.out.println("date: " + record.get("Date"));
}
}
}
@Test
public void testBOMInputStream_ParserWithReader() throws IOException {
try (final Reader reader = new InputStreamReader(createBOMInputStream("CSVFileParser/bom.csv"), UTF_8_NAME);
final CSVParser parser = new CSVParser(reader, CSVFormat.EXCEL.withHeader())) {
for (final CSVRecord record : parser) {
final String string = record.get("Date");
assertNotNull(string);
// System.out.println("date: " + record.get("Date"));
}
}
}
@Test
public void testBOMInputStream_parseWithReader() throws IOException {
try (final Reader reader = new InputStreamReader(createBOMInputStream("CSVFileParser/bom.csv"), UTF_8_NAME);
final CSVParser parser = CSVParser.parse(reader, CSVFormat.EXCEL.withHeader())) {
for (final CSVRecord record : parser) {
final String string = record.get("Date");
assertNotNull(string);
// System.out.println("date: " + record.get("Date"));
}
}
}
@Test
public void testCarriageReturnEndings() throws IOException {
final String code = "foo\rbaar,\rhello,world\r,kanu";
try (final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT)) {
final List<CSVRecord> records = parser.getRecords();
assertEquals(4, records.size());
}
}
@Test
public void testCarriageReturnLineFeedEndings() throws IOException {
final String code = "foo\r\nbaar,\r\nhello,world\r\n,kanu";
try (final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT)) {
final List<CSVRecord> records = parser.getRecords();
assertEquals(4, records.size());
}
}
@Test
public void testClose() throws Exception {
final Reader in = new StringReader("# comment\na,b,c\n1,2,3\nx,y,z");
final Iterator<CSVRecord> records;
try (final CSVParser parser = CSVFormat.DEFAULT.withCommentMarker('#').withHeader().parse(in)) {
records = parser.iterator();
assertTrue(records.hasNext());
}
assertFalse(records.hasNext());
assertThrows(NoSuchElementException.class, records::next);
}
@Test
public void testCSV57() throws Exception {
try (final CSVParser parser = CSVParser.parse("", CSVFormat.DEFAULT)) {
final List<CSVRecord> list = parser.getRecords();
assertNotNull(list);
assertEquals(0, list.size());
}
}
@Test
public void testDefaultFormat() throws IOException {
final String code = "" + "a,b#\n" // 1)
+ "\"\n\",\" \",#\n" // 2)
+ "#,\"\"\n" // 3)
+ "# Final comment\n"// 4)
;
final String[][] res = { { "a", "b#" }, { "\n", " ", "#" }, { "#", "" }, { "# Final comment" } };
CSVFormat format = CSVFormat.DEFAULT;
assertFalse(format.isCommentMarkerSet());
final String[][] res_comments = { { "a", "b#" }, { "\n", " ", "#" }, };
try (final CSVParser parser = CSVParser.parse(code, format)) {
final List<CSVRecord> records = parser.getRecords();
assertTrue(records.size() > 0);
Utils.compare("Failed to parse without comments", res, records);
format = CSVFormat.DEFAULT.withCommentMarker('#');
}
try (final CSVParser parser = CSVParser.parse(code, format)) {
final List<CSVRecord> records = parser.getRecords();
Utils.compare("Failed to parse with comments", res_comments, records);
}
}
@Test
public void testDuplicateHeadersNotAllowed() {
assertThrows(
IllegalArgumentException.class,
() -> CSVParser.parse("a,b,a\n1,2,3\nx,y,z",
CSVFormat.DEFAULT.withHeader().withAllowDuplicateHeaderNames(false)));
}
@Test
public void testDuplicateHeadersAllowedByDefault() throws Exception {
CSVParser.parse("a,b,a\n1,2,3\nx,y,z", CSVFormat.DEFAULT.withHeader());
}
@Test
public void testEmptyFileHeaderParsing() throws Exception {
try (final CSVParser parser = CSVParser.parse("", CSVFormat.DEFAULT.withFirstRecordAsHeader())) {
assertNull(parser.nextRecord());
assertTrue(parser.getHeaderNames().isEmpty());
}
}
@Test
public void testEmptyFile() throws Exception {
try (final CSVParser parser = CSVParser.parse("", CSVFormat.DEFAULT)) {
assertNull(parser.nextRecord());
}
}
@Test
public void testEmptyLineBehaviorCSV() throws Exception {
final String[] codes = { "hello,\r\n\r\n\r\n", "hello,\n\n\n", "hello,\"\"\r\n\r\n\r\n", "hello,\"\"\n\n\n" };
final String[][] res = { { "hello", "" } // CSV format ignores empty lines
};
for (final String code : codes) {
try (final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT)) {
final List<CSVRecord> records = parser.getRecords();
assertEquals(res.length, records.size());
assertTrue(records.size() > 0);
for (int i = 0; i < res.length; i++) {
assertArrayEquals(res[i], records.get(i).values());
}
}
}
}
@Test
public void testEmptyLineBehaviorExcel() throws Exception {
final String[] codes = { "hello,\r\n\r\n\r\n", "hello,\n\n\n", "hello,\"\"\r\n\r\n\r\n", "hello,\"\"\n\n\n" };
final String[][] res = { { "hello", "" }, { "" }, // Excel format does not ignore empty lines
{ "" } };
for (final String code : codes) {
try (final CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL)) {
final List<CSVRecord> records = parser.getRecords();
assertEquals(res.length, records.size());
assertTrue(records.size() > 0);
for (int i = 0; i < res.length; i++) {
assertArrayEquals(res[i], records.get(i).values());
}
}
}
}
@Test
public void testEndOfFileBehaviorCSV() throws Exception {
final String[] codes = { "hello,\r\n\r\nworld,\r\n", "hello,\r\n\r\nworld,", "hello,\r\n\r\nworld,\"\"\r\n",
"hello,\r\n\r\nworld,\"\"", "hello,\r\n\r\nworld,\n", "hello,\r\n\r\nworld,",
"hello,\r\n\r\nworld,\"\"\n", "hello,\r\n\r\nworld,\"\"" };
final String[][] res = { { "hello", "" }, // CSV format ignores empty lines
{ "world", "" } };
for (final String code : codes) {
try (final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT)) {
final List<CSVRecord> records = parser.getRecords();
assertEquals(res.length, records.size());
assertTrue(records.size() > 0);
for (int i = 0; i < res.length; i++) {
assertArrayEquals(res[i], records.get(i).values());
}
}
}
}
@Test
public void testEndOfFileBehaviorExcel() throws Exception {
final String[] codes = { "hello,\r\n\r\nworld,\r\n", "hello,\r\n\r\nworld,", "hello,\r\n\r\nworld,\"\"\r\n",
"hello,\r\n\r\nworld,\"\"", "hello,\r\n\r\nworld,\n", "hello,\r\n\r\nworld,",
"hello,\r\n\r\nworld,\"\"\n", "hello,\r\n\r\nworld,\"\"" };
final String[][] res = { { "hello", "" }, { "" }, // Excel format does not ignore empty lines
{ "world", "" } };
for (final String code : codes) {
try (final CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL)) {
final List<CSVRecord> records = parser.getRecords();
assertEquals(res.length, records.size());
assertTrue(records.size() > 0);
for (int i = 0; i < res.length; i++) {
assertArrayEquals(res[i], records.get(i).values());
}
}
}
}
@Test
public void testExcelFormat1() throws IOException {
final String code = "value1,value2,value3,value4\r\na,b,c,d\r\n x,,," +
"\r\n\r\n\"\"\"hello\"\"\",\" \"\"world\"\"\",\"abc\ndef\",\r\n";
final String[][] res = { { "value1", "value2", "value3", "value4" }, { "a", "b", "c", "d" },
{ " x", "", "", "" }, { "" }, { "\"hello\"", " \"world\"", "abc\ndef", "" } };
try (final CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL)) {
final List<CSVRecord> records = parser.getRecords();
assertEquals(res.length, records.size());
assertTrue(records.size() > 0);
for (int i = 0; i < res.length; i++) {
assertArrayEquals(res[i], records.get(i).values());
}
}
}
@Test
public void testExcelFormat2() throws Exception {
final String code = "foo,baar\r\n\r\nhello,\r\n\r\nworld,\r\n";
final String[][] res = { { "foo", "baar" }, { "" }, { "hello", "" }, { "" }, { "world", "" } };
try (final CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL)) {
final List<CSVRecord> records = parser.getRecords();
assertEquals(res.length, records.size());
assertTrue(records.size() > 0);
for (int i = 0; i < res.length; i++) {
assertArrayEquals(res[i], records.get(i).values());
}
}
}
/**
* Tests an exported Excel worksheet with a header row and rows that have more columns than the headers
* @throws Exception
*/
@Test
public void testExcelHeaderCountLessThanData() throws Exception {
final String code = "A,B,C,,\r\na,b,c,d,e\r\n";
try (final CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL.withHeader())) {
for (final CSVRecord record : parser.getRecords()) {
assertEquals("a", record.get("A"));
assertEquals("b", record.get("B"));
assertEquals("c", record.get("C"));
}
}
}
@Test
public void testFirstEndOfLineCr() throws IOException {
final String data = "foo\rbaar,\rhello,world\r,kanu";
try (final CSVParser parser = CSVParser.parse(data, CSVFormat.DEFAULT)) {
final List<CSVRecord> records = parser.getRecords();
assertEquals(4, records.size());
assertEquals("\r", parser.getFirstEndOfLine());
}
}
@Test
public void testFirstEndOfLineCrLf() throws IOException {
final String data = "foo\r\nbaar,\r\nhello,world\r\n,kanu";
try (final CSVParser parser = CSVParser.parse(data, CSVFormat.DEFAULT)) {
final List<CSVRecord> records = parser.getRecords();
assertEquals(4, records.size());
assertEquals("\r\n", parser.getFirstEndOfLine());
}
}
@Test
public void testFirstEndOfLineLf() throws IOException {
final String data = "foo\nbaar,\nhello,world\n,kanu";
try (final CSVParser parser = CSVParser.parse(data, CSVFormat.DEFAULT)) {
final List<CSVRecord> records = parser.getRecords();
assertEquals(4, records.size());
assertEquals("\n", parser.getFirstEndOfLine());
}
}
@Test
public void testForEach() throws Exception {
final List<CSVRecord> records = new ArrayList<>();
try (final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z")) {
for (final CSVRecord record : CSVFormat.DEFAULT.parse(in)) {
records.add(record);
}
assertEquals(3, records.size());
assertArrayEquals(new String[] { "a", "b", "c" }, records.get(0).values());
assertArrayEquals(new String[] { "1", "2", "3" }, records.get(1).values());
assertArrayEquals(new String[] { "x", "y", "z" }, records.get(2).values());
}
}
@Test
public void testGetHeaderMap() throws Exception {
try (final CSVParser parser = CSVParser.parse("a,b,c\n1,2,3\nx,y,z",
CSVFormat.DEFAULT.withHeader("A", "B", "C"))) {
final Map<String, Integer> headerMap = parser.getHeaderMap();
final Iterator<String> columnNames = headerMap.keySet().iterator();
// Headers are iterated in column order.
assertEquals("A", columnNames.next());
assertEquals("B", columnNames.next());
assertEquals("C", columnNames.next());
final Iterator<CSVRecord> records = parser.iterator();
// Parse to make sure getHeaderMap did not have a side-effect.
for (int i = 0; i < 3; i++) {
assertTrue(records.hasNext());
final CSVRecord record = records.next();
assertEquals(record.get(0), record.get("A"));
assertEquals(record.get(1), record.get("B"));
assertEquals(record.get(2), record.get("C"));
}
assertFalse(records.hasNext());
}
}
@Test
public void testGetHeaderNames() throws IOException {
try (final CSVParser parser = CSVParser.parse("a,b,c\n1,2,3\nx,y,z",
CSVFormat.DEFAULT.withHeader("A", "B", "C"))) {
final Map<String, Integer> nameIndexMap = parser.getHeaderMap();
final List<String> headerNames = parser.getHeaderNames();
assertNotNull(headerNames);
assertEquals(nameIndexMap.size(), headerNames.size());
for (int i = 0; i < headerNames.size(); i++) {
final String name = headerNames.get(i);
assertEquals(i, nameIndexMap.get(name).intValue());
}
}
}
@Test
public void testGetHeaderNamesReadOnly() throws IOException {
try (final CSVParser parser = CSVParser.parse("a,b,c\n1,2,3\nx,y,z",
CSVFormat.DEFAULT.withHeader("A", "B", "C"))) {
final List<String> headerNames = parser.getHeaderNames();
assertNotNull(headerNames);
assertThrows(UnsupportedOperationException.class, () -> headerNames.add("This is a read-only list."));
}
}
@Test
public void testGetLine() throws IOException {
try (final CSVParser parser = CSVParser.parse(CSV_INPUT, CSVFormat.DEFAULT.withIgnoreSurroundingSpaces())) {
for (final String[] re : RESULT) {
assertArrayEquals(re, parser.nextRecord().values());
}
assertNull(parser.nextRecord());
}
}
@Test
public void testGetLineNumberWithCR() throws Exception {
this.validateLineNumbers(String.valueOf(CR));
}
@Test
public void testGetLineNumberWithCRLF() throws Exception {
this.validateLineNumbers(CRLF);
}
@Test
public void testGetLineNumberWithLF() throws Exception {
this.validateLineNumbers(String.valueOf(LF));
}
@Test
public void testGetOneLine() throws IOException {
try (final CSVParser parser = CSVParser.parse(CSV_INPUT_1, CSVFormat.DEFAULT)) {
final CSVRecord record = parser.getRecords().get(0);
assertArrayEquals(RESULT[0], record.values());
}
}
/**
* Tests reusing a parser to process new string records one at a time as they are being discovered. See [CSV-110].
*
* @throws IOException
*/
@Test
public void testGetOneLineOneParser() throws IOException {
final CSVFormat format = CSVFormat.DEFAULT;
try (final PipedWriter writer = new PipedWriter();
final CSVParser parser = new CSVParser(new PipedReader(writer), format)) {
writer.append(CSV_INPUT_1);
writer.append(format.getRecordSeparator());
final CSVRecord record1 = parser.nextRecord();
assertArrayEquals(RESULT[0], record1.values());
writer.append(CSV_INPUT_2);
writer.append(format.getRecordSeparator());
final CSVRecord record2 = parser.nextRecord();
assertArrayEquals(RESULT[1], record2.values());
}
}
@Test
public void testGetRecordNumberWithCR() throws Exception {
this.validateRecordNumbers(String.valueOf(CR));
}
@Test
public void testGetRecordNumberWithCRLF() throws Exception {
this.validateRecordNumbers(CRLF);
}
@Test
public void testGetRecordNumberWithLF() throws Exception {
this.validateRecordNumbers(String.valueOf(LF));
}
@Test
public void testGetRecordPositionWithCRLF() throws Exception {
this.validateRecordPosition(CRLF);
}
@Test
public void testGetRecordPositionWithLF() throws Exception {
this.validateRecordPosition(String.valueOf(LF));
}
@Test
public void testGetRecords() throws IOException {
try (final CSVParser parser = CSVParser.parse(CSV_INPUT, CSVFormat.DEFAULT.withIgnoreSurroundingSpaces())) {
final List<CSVRecord> records = parser.getRecords();
assertEquals(RESULT.length, records.size());
assertTrue(records.size() > 0);
for (int i = 0; i < RESULT.length; i++) {
assertArrayEquals(RESULT[i], records.get(i).values());
}
}
}
@Test
public void testGetRecordWithMultiLineValues() throws Exception {
try (final CSVParser parser = CSVParser.parse(
"\"a\r\n1\",\"a\r\n2\"" + CRLF + "\"b\r\n1\",\"b\r\n2\"" + CRLF + "\"c\r\n1\",\"c\r\n2\"",
CSVFormat.DEFAULT.withRecordSeparator(CRLF))) {
CSVRecord record;
assertEquals(0, parser.getRecordNumber());
assertEquals(0, parser.getCurrentLineNumber());
assertNotNull(record = parser.nextRecord());
assertEquals(3, parser.getCurrentLineNumber());
assertEquals(1, record.getRecordNumber());
assertEquals(1, parser.getRecordNumber());
assertNotNull(record = parser.nextRecord());
assertEquals(6, parser.getCurrentLineNumber());
assertEquals(2, record.getRecordNumber());
assertEquals(2, parser.getRecordNumber());
assertNotNull(record = parser.nextRecord());
assertEquals(8, parser.getCurrentLineNumber());
assertEquals(3, record.getRecordNumber());
assertEquals(3, parser.getRecordNumber());
assertNull(record = parser.nextRecord());
assertEquals(8, parser.getCurrentLineNumber());
assertEquals(3, parser.getRecordNumber());
}
}
@Test
public void testHeader() throws Exception {
final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z");
final Iterator<CSVRecord> records = CSVFormat.DEFAULT.withHeader().parse(in).iterator();
for (int i = 0; i < 2; i++) {
assertTrue(records.hasNext());
final CSVRecord record = records.next();
assertEquals(record.get(0), record.get("a"));
assertEquals(record.get(1), record.get("b"));
assertEquals(record.get(2), record.get("c"));
}
assertFalse(records.hasNext());
}
@Test
public void testHeaderComment() throws Exception {
final Reader in = new StringReader("# comment\na,b,c\n1,2,3\nx,y,z");
final Iterator<CSVRecord> records = CSVFormat.DEFAULT.withCommentMarker('#').withHeader().parse(in).iterator();
for (int i = 0; i < 2; i++) {
assertTrue(records.hasNext());
final CSVRecord record = records.next();
assertEquals(record.get(0), record.get("a"));
assertEquals(record.get(1), record.get("b"));
assertEquals(record.get(2), record.get("c"));
}
assertFalse(records.hasNext());
}
@Test
public void testHeaderMissing() throws Exception {
final Reader in = new StringReader("a,,c\n1,2,3\nx,y,z");
final Iterator<CSVRecord> records = CSVFormat.DEFAULT.withHeader().withAllowMissingColumnNames().parse(in).iterator();
for (int i = 0; i < 2; i++) {
assertTrue(records.hasNext());
final CSVRecord record = records.next();
assertEquals(record.get(0), record.get("a"));
assertEquals(record.get(2), record.get("c"));
}
assertFalse(records.hasNext());
}
@Test
public void testHeaderMissingWithNull() throws Exception {
final Reader in = new StringReader("a,,c,,e\n1,2,3,4,5\nv,w,x,y,z");
CSVFormat.DEFAULT.withHeader().withNullString("").withAllowMissingColumnNames().parse(in).iterator();
}
@Test
public void testHeadersMissing() throws Exception {
final Reader in = new StringReader("a,,c,,e\n1,2,3,4,5\nv,w,x,y,z");
CSVFormat.DEFAULT.withHeader().withAllowMissingColumnNames().parse(in).iterator();
}
@Test
public void testHeadersMissingException() {
final Reader in = new StringReader("a,,c,,e\n1,2,3,4,5\nv,w,x,y,z");
assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withHeader().parse(in).iterator());
}
@Test
public void testHeadersMissingOneColumnException() throws Exception {
final Reader in = new StringReader("a,,c,d,e\n1,2,3,4,5\nv,w,x,y,z");
assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withHeader().parse(in).iterator());
}
@Test
public void testHeadersWithNullColumnName() throws IOException {
final Reader in = new StringReader("header1,null,header3\n1,2,3\n4,5,6");
final Iterator<CSVRecord> records = CSVFormat.DEFAULT
.withHeader()
.withNullString("null")
.withAllowMissingColumnNames()
.parse(in).iterator();
final CSVRecord record = records.next();
// Expect the null header to be missing
assertEquals(Arrays.asList("header1", "header3"), record.getParser().getHeaderNames());
assertEquals(2, record.getParser().getHeaderMap().size());
}
@Test
public void testIgnoreCaseHeaderMapping() throws Exception {
final Reader reader = new StringReader("1,2,3");
final Iterator<CSVRecord> records = CSVFormat.DEFAULT.withHeader("One", "TWO", "three").withIgnoreHeaderCase()
.parse(reader).iterator();
final CSVRecord record = records.next();
assertEquals("1", record.get("one"));
assertEquals("2", record.get("two"));
assertEquals("3", record.get("THREE"));
}
@Test
public void testIgnoreEmptyLines() throws IOException {
final String code = "\nfoo,baar\n\r\n,\n\n,world\r\n\n";
// String code = "world\r\n\n";
// String code = "foo;baar\r\n\r\nhello;\r\n\r\nworld;\r\n";
try (final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT)) {
final List<CSVRecord> records = parser.getRecords();
assertEquals(3, records.size());
}
}
@Test
public void testInvalidFormat() {
assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withDelimiter(CR));
}
@Test
public void testIterator() throws Exception {
final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z");
final Iterator<CSVRecord> iterator = CSVFormat.DEFAULT.parse(in).iterator();
assertTrue(iterator.hasNext());
assertThrows(UnsupportedOperationException.class, iterator::remove);
assertArrayEquals(new String[] { "a", "b", "c" }, iterator.next().values());
assertArrayEquals(new String[] { "1", "2", "3" }, iterator.next().values());
assertTrue(iterator.hasNext());
assertTrue(iterator.hasNext());
assertTrue(iterator.hasNext());
assertArrayEquals(new String[] { "x", "y", "z" }, iterator.next().values());
assertFalse(iterator.hasNext());
assertThrows(NoSuchElementException.class, iterator::next);
}
@Test
public void testIteratorSequenceBreaking() throws IOException {
final String fiveRows = "1\n2\n3\n4\n5\n";
// Iterator hasNext() shouldn't break sequence
try (CSVParser parser = CSVFormat.DEFAULT.parse(new StringReader(fiveRows))) {
int recordNumber = 0;
final Iterator<CSVRecord> iter = parser.iterator();
recordNumber = 0;
while (iter.hasNext()) {
final CSVRecord record = iter.next();
recordNumber++;
assertEquals(String.valueOf(recordNumber), record.get(0));
if (recordNumber >= 2) {
break;
}
}
iter.hasNext();
while (iter.hasNext()) {
final CSVRecord record = iter.next();
recordNumber++;
assertEquals(String.valueOf(recordNumber), record.get(0));
}
}
// Consecutive enhanced for loops shouldn't break sequence
try (CSVParser parser = CSVFormat.DEFAULT.parse(new StringReader(fiveRows))) {
int recordNumber = 0;
for (final CSVRecord record : parser) {
recordNumber++;
assertEquals(String.valueOf(recordNumber), record.get(0));
if (recordNumber >= 2) {
break;
}
}
for (final CSVRecord record : parser) {
recordNumber++;
assertEquals(String.valueOf(recordNumber), record.get(0));
}
}
// Consecutive enhanced for loops with hasNext() peeking shouldn't break sequence
try (CSVParser parser = CSVFormat.DEFAULT.parse(new StringReader(fiveRows))) {
int recordNumber = 0;
for (final CSVRecord record : parser) {
recordNumber++;
assertEquals(String.valueOf(recordNumber), record.get(0));
if (recordNumber >= 2) {
break;
}
}
parser.iterator().hasNext();
for (final CSVRecord record : parser) {
recordNumber++;
assertEquals(String.valueOf(recordNumber), record.get(0));
}
}
}
@Test
public void testLineFeedEndings() throws IOException {
final String code = "foo\nbaar,\nhello,world\n,kanu";
try (final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT)) {
final List<CSVRecord> records = parser.getRecords();
assertEquals(4, records.size());
}
}
@Test
public void testMappedButNotSetAsOutlook2007ContactExport() throws Exception {
final Reader in = new StringReader("a,b,c\n1,2\nx,y,z");
final Iterator<CSVRecord> records = CSVFormat.DEFAULT.withHeader("A", "B", "C").withSkipHeaderRecord().parse(in)
.iterator();
CSVRecord record;
// 1st record
record = records.next();
assertTrue(record.isMapped("A"));
assertTrue(record.isMapped("B"));
assertTrue(record.isMapped("C"));
assertTrue(record.isSet("A"));
assertTrue(record.isSet("B"));
assertFalse(record.isSet("C"));
assertEquals("1", record.get("A"));
assertEquals("2", record.get("B"));
assertFalse(record.isConsistent());
// 2nd record
record = records.next();
assertTrue(record.isMapped("A"));
assertTrue(record.isMapped("B"));
assertTrue(record.isMapped("C"));
assertTrue(record.isSet("A"));
assertTrue(record.isSet("B"));
assertTrue(record.isSet("C"));
assertEquals("x", record.get("A"));
assertEquals("y", record.get("B"));
assertEquals("z", record.get("C"));
assertTrue(record.isConsistent());
assertFalse(records.hasNext());
}
@Test
@Disabled
public void testMongoDbCsv() throws Exception {
try (final CSVParser parser = CSVParser.parse("\"a a\",b,c" + LF + "d,e,f", CSVFormat.MONGODB_CSV)) {
final Iterator<CSVRecord> itr1 = parser.iterator();
final Iterator<CSVRecord> itr2 = parser.iterator();
final CSVRecord first = itr1.next();
assertEquals("a a", first.get(0));
assertEquals("b", first.get(1));
assertEquals("c", first.get(2));
final CSVRecord second = itr2.next();
assertEquals("d", second.get(0));
assertEquals("e", second.get(1));
assertEquals("f", second.get(2));
}
}
@Test
// TODO this may lead to strange behavior, throw an exception if iterator() has already been called?
public void testMultipleIterators() throws Exception {
try (final CSVParser parser = CSVParser.parse("a,b,c" + CRLF + "d,e,f", CSVFormat.DEFAULT)) {
final Iterator<CSVRecord> itr1 = parser.iterator();
final CSVRecord first = itr1.next();
assertEquals("a", first.get(0));
assertEquals("b", first.get(1));
assertEquals("c", first.get(2));
final CSVRecord second = itr1.next();
assertEquals("d", second.get(0));
assertEquals("e", second.get(1));
assertEquals("f", second.get(2));
}
}
@Test
public void testNewCSVParserNullReaderFormat() {
assertThrows(IllegalArgumentException.class, () -> new CSVParser(null, CSVFormat.DEFAULT));
}
@Test
public void testNewCSVParserReaderNullFormat() {
assertThrows(IllegalArgumentException.class, () -> new CSVParser(new StringReader(""), null));
}
@Test
public void testNoHeaderMap() throws Exception {
try (final CSVParser parser = CSVParser.parse("a,b,c\n1,2,3\nx,y,z", CSVFormat.DEFAULT)) {
assertNull(parser.getHeaderMap());
}
}
@Test
public void testParse() throws Exception {
final ClassLoader loader = ClassLoader.getSystemClassLoader();
final URL url = loader.getResource("CSVFileParser/test.csv");
final CSVFormat format = CSVFormat.DEFAULT.withHeader("A", "B", "C", "D");
final Charset charset = StandardCharsets.UTF_8;
try(final CSVParser parser = CSVParser.parse(new InputStreamReader(url.openStream(), charset), format)) {
parseFully(parser);
}
try(final CSVParser parser = CSVParser.parse(new String(Files.readAllBytes(Paths.get(url.toURI())), charset), format)) {
parseFully(parser);
}
try(final CSVParser parser = CSVParser.parse(new File(url.toURI()), charset, format)) {
parseFully(parser);
}
try(final CSVParser parser = CSVParser.parse(url.openStream(), charset, format)) {
parseFully(parser);
}
try(final CSVParser parser = CSVParser.parse(Paths.get(url.toURI()), charset, format)) {
parseFully(parser);
}
try(final CSVParser parser = CSVParser.parse(url, charset, format)) {
parseFully(parser);
}
try(final CSVParser parser = new CSVParser(new InputStreamReader(url.openStream(), charset), format)) {
parseFully(parser);
}
try(final CSVParser parser = new CSVParser(new InputStreamReader(url.openStream(), charset), format, /*characterOffset=*/0, /*recordNumber=*/1)) {
parseFully(parser);
}
}
@Test
public void testParseFileNullFormat() {
assertThrows(
IllegalArgumentException.class,
() -> CSVParser.parse(new File("CSVFileParser/test.csv"), Charset.defaultCharset(), null));
}
@Test
public void testParseNullFileFormat() {
assertThrows(
IllegalArgumentException.class,
() -> CSVParser.parse((File) null, Charset.defaultCharset(), CSVFormat.DEFAULT));
}
@Test
public void testParseNullPathFormat() {
assertThrows(
IllegalArgumentException.class,
() -> CSVParser.parse((Path) null, Charset.defaultCharset(), CSVFormat.DEFAULT));
}
@Test
public void testParseNullStringFormat() {
assertThrows(IllegalArgumentException.class, () -> CSVParser.parse((String) null, CSVFormat.DEFAULT));
}
@Test
public void testParseNullUrlCharsetFormat() {
assertThrows(
IllegalArgumentException.class,
() -> CSVParser.parse((URL) null, Charset.defaultCharset(), CSVFormat.DEFAULT));
}
@Test
public void testParserUrlNullCharsetFormat() {
assertThrows(
IllegalArgumentException.class,
() -> CSVParser.parse(new URL("https://commons.apache.org"), null, CSVFormat.DEFAULT));
}
@Test
public void testParseStringNullFormat() {
assertThrows(IllegalArgumentException.class, () -> CSVParser.parse("csv data", (CSVFormat) null));
}
@Test
public void testParseUrlCharsetNullFormat() {
assertThrows(
IllegalArgumentException.class,
() -> CSVParser.parse(new URL("https://commons.apache.org"), Charset.defaultCharset(), null));
}
@Test
public void testProvidedHeader() throws Exception {
final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z");
final Iterator<CSVRecord> records = CSVFormat.DEFAULT.withHeader("A", "B", "C").parse(in).iterator();
for (int i = 0; i < 3; i++) {
assertTrue(records.hasNext());
final CSVRecord record = records.next();
assertTrue(record.isMapped("A"));
assertTrue(record.isMapped("B"));
assertTrue(record.isMapped("C"));
assertFalse(record.isMapped("NOT MAPPED"));
assertEquals(record.get(0), record.get("A"));
assertEquals(record.get(1), record.get("B"));
assertEquals(record.get(2), record.get("C"));
}
assertFalse(records.hasNext());
}
@Test
public void testProvidedHeaderAuto() throws Exception {
final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z");
final Iterator<CSVRecord> records = CSVFormat.DEFAULT.withHeader().parse(in).iterator();
for (int i = 0; i < 2; i++) {
assertTrue(records.hasNext());
final CSVRecord record = records.next();
assertTrue(record.isMapped("a"));
assertTrue(record.isMapped("b"));
assertTrue(record.isMapped("c"));
assertFalse(record.isMapped("NOT MAPPED"));
assertEquals(record.get(0), record.get("a"));
assertEquals(record.get(1), record.get("b"));
assertEquals(record.get(2), record.get("c"));
}
assertFalse(records.hasNext());
}
@Test
public void testRoundtrip() throws Exception {
final StringWriter out = new StringWriter();
try (final CSVPrinter printer = new CSVPrinter(out, CSVFormat.DEFAULT)) {
final String input = "a,b,c\r\n1,2,3\r\nx,y,z\r\n";
for (final CSVRecord record : CSVParser.parse(input, CSVFormat.DEFAULT)) {
printer.printRecord(record);
}
assertEquals(input, out.toString());
}
}
@Test
public void testSkipAutoHeader() throws Exception {
final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z");
final Iterator<CSVRecord> records = CSVFormat.DEFAULT.withHeader().parse(in).iterator();
final CSVRecord record = records.next();
assertEquals("1", record.get("a"));
assertEquals("2", record.get("b"));
assertEquals("3", record.get("c"));
}
@Test
public void testSkipHeaderOverrideDuplicateHeaders() throws Exception {
final Reader in = new StringReader("a,a,a\n1,2,3\nx,y,z");
final Iterator<CSVRecord> records = CSVFormat.DEFAULT.withHeader("X", "Y", "Z").withSkipHeaderRecord().parse(in)
.iterator();
final CSVRecord record = records.next();
assertEquals("1", record.get("X"));
assertEquals("2", record.get("Y"));
assertEquals("3", record.get("Z"));
}
@Test
public void testSkipSetAltHeaders() throws Exception {
final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z");
final Iterator<CSVRecord> records = CSVFormat.DEFAULT.withHeader("X", "Y", "Z").withSkipHeaderRecord().parse(in)
.iterator();
final CSVRecord record = records.next();
assertEquals("1", record.get("X"));
assertEquals("2", record.get("Y"));
assertEquals("3", record.get("Z"));
}
@Test
public void testSkipSetHeader() throws Exception {
final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z");
final Iterator<CSVRecord> records = CSVFormat.DEFAULT.withHeader("a", "b", "c").withSkipHeaderRecord().parse(in)
.iterator();
final CSVRecord record = records.next();
assertEquals("1", record.get("a"));
assertEquals("2", record.get("b"));
assertEquals("3", record.get("c"));
}
@Test
@Disabled
public void testStartWithEmptyLinesThenHeaders() throws Exception {
final String[] codes = { "\r\n\r\n\r\nhello,\r\n\r\n\r\n", "hello,\n\n\n", "hello,\"\"\r\n\r\n\r\n",
"hello,\"\"\n\n\n" };
final String[][] res = { { "hello", "" }, { "" }, // Excel format does not ignore empty lines
{ "" } };
for (final String code : codes) {
try (final CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL)) {
final List<CSVRecord> records = parser.getRecords();
assertEquals(res.length, records.size());
assertTrue(records.size() > 0);
for (int i = 0; i < res.length; i++) {
assertArrayEquals(res[i], records.get(i).values());
}
}
}
}
@Test
public void testTrailingDelimiter() throws Exception {
final Reader in = new StringReader("a,a,a,\n\"1\",\"2\",\"3\",\nx,y,z,");
final Iterator<CSVRecord> records = CSVFormat.DEFAULT.withHeader("X", "Y", "Z").withSkipHeaderRecord()
.withTrailingDelimiter().parse(in).iterator();
final CSVRecord record = records.next();
assertEquals("1", record.get("X"));
assertEquals("2", record.get("Y"));
assertEquals("3", record.get("Z"));
assertEquals(3, record.size());
}
@Test
public void testTrim() throws Exception {
final Reader in = new StringReader("a,a,a\n\" 1 \",\" 2 \",\" 3 \"\nx,y,z");
final Iterator<CSVRecord> records = CSVFormat.DEFAULT.withHeader("X", "Y", "Z").withSkipHeaderRecord()
.withTrim().parse(in).iterator();
final CSVRecord record = records.next();
assertEquals("1", record.get("X"));
assertEquals("2", record.get("Y"));
assertEquals("3", record.get("Z"));
assertEquals(3, record.size());
}
@Test
public void testRepeatedHeadersAreReturnedInCSVRecordHeaderNames() throws IOException {
final Reader in = new StringReader("header1,header2,header1\n1,2,3\n4,5,6");
final Iterator<CSVRecord> records = CSVFormat.DEFAULT.withFirstRecordAsHeader().withTrim().parse(in).iterator();
final CSVRecord record = records.next();
assertEquals(Arrays.asList("header1", "header2", "header1"), record.getParser().getHeaderNames());
}
@Test
public void testCSV235() throws IOException {
final String dqString = "\"aaa\",\"b\"\"bb\",\"ccc\""; // "aaa","b""bb","ccc"
final Iterator<CSVRecord> records = CSVFormat.RFC4180.parse(new StringReader(dqString)).iterator();
final CSVRecord record = records.next();
assertFalse(records.hasNext());
assertEquals(3, record.size());
assertEquals("aaa", record.get(0));
assertEquals("b\"bb", record.get(1));
assertEquals("ccc", record.get(2));
}
private void validateLineNumbers(final String lineSeparator) throws IOException {
try (final CSVParser parser = CSVParser.parse("a" + lineSeparator + "b" + lineSeparator + "c",
CSVFormat.DEFAULT.withRecordSeparator(lineSeparator))) {
assertEquals(0, parser.getCurrentLineNumber());
assertNotNull(parser.nextRecord());
assertEquals(1, parser.getCurrentLineNumber());
assertNotNull(parser.nextRecord());
assertEquals(2, parser.getCurrentLineNumber());
assertNotNull(parser.nextRecord());
// Still 2 because the last line is does not have EOL chars
assertEquals(2, parser.getCurrentLineNumber());
assertNull(parser.nextRecord());
// Still 2 because the last line is does not have EOL chars
assertEquals(2, parser.getCurrentLineNumber());
}
}
private void validateRecordNumbers(final String lineSeparator) throws IOException {
try (final CSVParser parser = CSVParser.parse("a" + lineSeparator + "b" + lineSeparator + "c",
CSVFormat.DEFAULT.withRecordSeparator(lineSeparator))) {
CSVRecord record;
assertEquals(0, parser.getRecordNumber());
assertNotNull(record = parser.nextRecord());
assertEquals(1, record.getRecordNumber());
assertEquals(1, parser.getRecordNumber());
assertNotNull(record = parser.nextRecord());
assertEquals(2, record.getRecordNumber());
assertEquals(2, parser.getRecordNumber());
assertNotNull(record = parser.nextRecord());
assertEquals(3, record.getRecordNumber());
assertEquals(3, parser.getRecordNumber());
assertNull(record = parser.nextRecord());
assertEquals(3, parser.getRecordNumber());
}
}
private void validateRecordPosition(final String lineSeparator) throws IOException {
final String nl = lineSeparator; // used as linebreak in values for better distinction
final String code = "a,b,c" + lineSeparator + "1,2,3" + lineSeparator +
// to see if recordPosition correctly points to the enclosing quote
"'A" + nl + "A','B" + nl + "B',CC" + lineSeparator +
// unicode test... not very relevant while operating on strings instead of bytes, but for
// completeness...
"\u00c4,\u00d6,\u00dc" + lineSeparator + "EOF,EOF,EOF";
final CSVFormat format = CSVFormat.newFormat(',').withQuote('\'').withRecordSeparator(lineSeparator);
CSVParser parser = CSVParser.parse(code, format);
CSVRecord record;
assertEquals(0, parser.getRecordNumber());
assertNotNull(record = parser.nextRecord());
assertEquals(1, record.getRecordNumber());
assertEquals(code.indexOf('a'), record.getCharacterPosition());
assertNotNull(record = parser.nextRecord());
assertEquals(2, record.getRecordNumber());
assertEquals(code.indexOf('1'), record.getCharacterPosition());
assertNotNull(record = parser.nextRecord());
final long positionRecord3 = record.getCharacterPosition();
assertEquals(3, record.getRecordNumber());
assertEquals(code.indexOf("'A"), record.getCharacterPosition());
assertEquals("A" + lineSeparator + "A", record.get(0));
assertEquals("B" + lineSeparator + "B", record.get(1));
assertEquals("CC", record.get(2));
assertNotNull(record = parser.nextRecord());
assertEquals(4, record.getRecordNumber());
assertEquals(code.indexOf('\u00c4'), record.getCharacterPosition());
assertNotNull(record = parser.nextRecord());
assertEquals(5, record.getRecordNumber());
assertEquals(code.indexOf("EOF"), record.getCharacterPosition());
parser.close();
// now try to read starting at record 3
parser = new CSVParser(new StringReader(code.substring((int) positionRecord3)), format, positionRecord3, 3);
assertNotNull(record = parser.nextRecord());
assertEquals(3, record.getRecordNumber());
assertEquals(code.indexOf("'A"), record.getCharacterPosition());
assertEquals("A" + lineSeparator + "A", record.get(0));
assertEquals("B" + lineSeparator + "B", record.get(1));
assertEquals("CC", record.get(2));
assertNotNull(record = parser.nextRecord());
assertEquals(4, record.getRecordNumber());
assertEquals(code.indexOf('\u00c4'), record.getCharacterPosition());
assertEquals("\u00c4", record.get(0));
parser.close();
}
}