solr/core/src/test/org/apache/solr/internal/csv/CSVParserTest.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.solr.internal.csv;

 import java.io.IOException;
 import java.io.Reader;
 import java.io.StringReader;
 import java.util.Arrays;

 import junit.framework.TestCase;

 /**
  * CSVParserTest
  *
  * The test are organized in three different sections:
  * The 'setter/getter' section, the lexer section and finally the parser
  * section. In case a test fails, you should follow a top-down approach for
  * fixing a potential bug (it's likely that the parser itself fails if the lexer
  * has problems...).
  */
 public class CSVParserTest extends TestCase {

   /**
    * TestCSVParser.
    */
   static class TestCSVParser extends CSVParser {
     /**
      * Test parser to investigate the type of the internal Token.
      * @param in a Reader
      */
     TestCSVParser(Reader in) {
       super(in);
     }

     TestCSVParser(Reader in, CSVStrategy strategy) {
       super(in, strategy);
     }
     /**
      * Calls super.nextToken() and prints out a String representation of token
      * type and content.
      * @return String representation of token type and content
      * @throws IOException like {@link CSVParser#nextToken()}
      */
     public String testNextToken() throws IOException {
       Token t = super.nextToken();
       return Integer.toString(t.type) + ";" + t.content + ";";
     }
   }

   // ======================================================
   //   lexer tests
   // ======================================================

   // Single line (without comment)
   public void testNextToken1() throws IOException {
     String code = "abc,def, hijk,  lmnop,   qrst,uv ,wxy   ,z , ,";
     TestCSVParser parser = new TestCSVParser(new StringReader(code));
     assertEquals(CSVParser.TT_TOKEN + ";abc;", parser.testNextToken());
     assertEquals(CSVParser.TT_TOKEN + ";def;", parser.testNextToken());
     assertEquals(CSVParser.TT_TOKEN + ";hijk;", parser.testNextToken());
     assertEquals(CSVParser.TT_TOKEN + ";lmnop;", parser.testNextToken());
     assertEquals(CSVParser.TT_TOKEN + ";qrst;", parser.testNextToken());
     assertEquals(CSVParser.TT_TOKEN + ";uv;", parser.testNextToken());
     assertEquals(CSVParser.TT_TOKEN + ";wxy;", parser.testNextToken());
     assertEquals(CSVParser.TT_TOKEN + ";z;", parser.testNextToken());
     assertEquals(CSVParser.TT_TOKEN + ";;", parser.testNextToken());
     assertEquals(CSVParser.TT_EOF + ";;", parser.testNextToken());
   }

   // multiline including comments (and empty lines)
   public void testNextToken2() throws IOException {
     /*   file:   1,2,3,
      *           a,b x,c
      *
      *           # this is a comment
      *           d,e,
      *
      */
     String code = "1,2,3,\na,b x,c\n#foo\n\nd,e,\n\n";
     CSVStrategy strategy = (CSVStrategy)CSVStrategy.DEFAULT_STRATEGY.clone();
     // strategy.setIgnoreEmptyLines(false);
     strategy.setCommentStart('#');

     TestCSVParser parser = new TestCSVParser(new StringReader(code), strategy);


     assertEquals(CSVParser.TT_TOKEN + ";1;", parser.testNextToken());
     assertEquals(CSVParser.TT_TOKEN + ";2;", parser.testNextToken());
     assertEquals(CSVParser.TT_TOKEN + ";3;", parser.testNextToken());
     assertEquals(CSVParser.TT_EORECORD + ";;", parser.testNextToken());
     assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
     assertEquals(CSVParser.TT_TOKEN + ";b x;", parser.testNextToken());
     assertEquals(CSVParser.TT_EORECORD + ";c;", parser.testNextToken());
     assertEquals(CSVParser.TT_EORECORD + ";;", parser.testNextToken());
     assertEquals(CSVParser.TT_TOKEN + ";d;", parser.testNextToken());
     assertEquals(CSVParser.TT_TOKEN + ";e;", parser.testNextToken());
     assertEquals(CSVParser.TT_EORECORD + ";;", parser.testNextToken());
     assertEquals(CSVParser.TT_EOF + ";;", parser.testNextToken());
     assertEquals(CSVParser.TT_EOF + ";;", parser.testNextToken());

   }

   // simple token with escaping
   public void testNextToken3() throws IOException {
     /* file: a,\,,b
      *       \,,
      */
     String code = "a,\\,,b\n\\,,";
     CSVStrategy strategy = (CSVStrategy)CSVStrategy.DEFAULT_STRATEGY.clone();
     strategy.setCommentStart('#');
     TestCSVParser parser = new TestCSVParser(new StringReader(code), strategy);

     assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
     // an unquoted single backslash is not an escape char
     assertEquals(CSVParser.TT_TOKEN + ";\\;", parser.testNextToken());
     assertEquals(CSVParser.TT_TOKEN + ";;", parser.testNextToken());
     assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken());
     // an unquoted single backslash is not an escape char
     assertEquals(CSVParser.TT_TOKEN + ";\\;", parser.testNextToken());
     assertEquals(CSVParser.TT_TOKEN + ";;", parser.testNextToken());
     assertEquals(CSVParser.TT_EOF + ";;", parser.testNextToken());
   }

   // encapsulator tokenizer (sinle line)
   public void testNextToken4() throws IOException {
     /* file:  a,"foo",b
      *        a,   " foo",b
      *        a,"foo "   ,b     // whitespace after closing encapsulator
      *        a,  " foo " ,b
      */
      String code =
       "a,\"foo\",b\na,   \" foo\",b\na,\"foo \"  ,b\na,  \" foo \"  ,b";
      TestCSVParser parser = new TestCSVParser(new StringReader(code));
      assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
      assertEquals(CSVParser.TT_TOKEN + ";foo;", parser.testNextToken());
      assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken());
      assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
      assertEquals(CSVParser.TT_TOKEN + "; foo;", parser.testNextToken());
      assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken());
      assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
      assertEquals(CSVParser.TT_TOKEN + ";foo ;", parser.testNextToken());
      assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken());
      assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
      assertEquals(CSVParser.TT_TOKEN + "; foo ;", parser.testNextToken());
 //     assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken());
      assertEquals(CSVParser.TT_EOF + ";b;", parser.testNextToken());
   }

   // encapsulator tokenizer (multi line, delimiter in string)
   public void testNextToken5() throws IOException {
     String code =
       "a,\"foo\n\",b\n\"foo\n  baar ,,,\"\n\"\n\t \n\"";
     TestCSVParser parser = new TestCSVParser(new StringReader(code));
     assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
     assertEquals(CSVParser.TT_TOKEN + ";foo\n;", parser.testNextToken());
     assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken());
     assertEquals(CSVParser.TT_EORECORD + ";foo\n  baar ,,,;",
         parser.testNextToken());
     assertEquals(CSVParser.TT_EOF + ";\n\t \n;", parser.testNextToken());

   }

   // change delimiters, comment, encapsulater
   public void testNextToken6() throws IOException {
     /* file: a;'b and \' more
      *       '
      *       !comment;;;;
      *       ;;
      */
     String code = "a;'b and '' more\n'\n!comment;;;;\n;;";
     TestCSVParser parser = new TestCSVParser(new StringReader(code), new CSVStrategy(';', '\'', '!'));
     assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
     assertEquals(
       CSVParser.TT_EORECORD + ";b and ' more\n;",
       parser.testNextToken());
   }


   // ======================================================
   //   parser tests
   // ======================================================

   String code =
     "a,b,c,d\n"
     + " a , b , 1 2 \n"
     + "\"foo baar\", b,\n"
    // + "   \"foo\n,,\n\"\",,\n\\\"\",d,e\n";
       + "   \"foo\n,,\n\"\",,\n\"\"\",d,e\n";   // changed to use standard CSV escaping
   String[][] res = {
     {"a", "b", "c", "d"},
     {"a", "b", "1 2"},
     {"foo baar", "b", ""},
     {"foo\n,,\n\",,\n\"", "d", "e"}
   };
   public void testGetLine() throws IOException {
     CSVParser parser = new CSVParser(new StringReader(code));
     String[] tmp = null;
     for (int i = 0; i < res.length; i++) {
       tmp = parser.getLine();
       assertTrue(Arrays.equals(res[i], tmp));
     }
     tmp = parser.getLine();
     assertTrue(tmp == null);
   }

   public void testNextValue() throws IOException {
     CSVParser parser = new CSVParser(new StringReader(code));
     String tmp = null;
     for (int i = 0; i < res.length; i++) {
       for (int j = 0; j < res[i].length; j++) {
         tmp = parser.nextValue();
         assertEquals(res[i][j], tmp);
       }
     }
     tmp = parser.nextValue();
     assertTrue(tmp == null);
   }

   public void testGetAllValues() throws IOException {
     CSVParser parser = new CSVParser(new StringReader(code));
     String[][] tmp = parser.getAllValues();
     assertEquals(res.length, tmp.length);
     assertTrue(tmp.length > 0);
     for (int i = 0; i < res.length; i++) {
       assertTrue(Arrays.equals(res[i], tmp[i]));
     }
   }

   public void testExcelStrategy1() throws IOException {
     String code =
       "value1,value2,value3,value4\r\na,b,c,d\r\n  x,,,"
       + "\r\n\r\n\"\"\"hello\"\"\",\"  \"\"world\"\"\",\"abc\ndef\",\r\n";
     String[][] res = {
       {"value1", "value2", "value3", "value4"},
       {"a", "b", "c", "d"},
       {"  x", "", "", ""},
       {""},
       {"\"hello\"", "  \"world\"", "abc\ndef", ""}
     };
     CSVParser parser = new CSVParser(new StringReader(code), CSVStrategy.EXCEL_STRATEGY);
     String[][] tmp = parser.getAllValues();
     assertEquals(res.length, tmp.length);
     assertTrue(tmp.length > 0);
     for (int i = 0; i < res.length; i++) {
       assertTrue(Arrays.equals(res[i], tmp[i]));
     }
   }

   public void testExcelStrategy2() throws Exception {
     String code = "foo,baar\r\n\r\nhello,\r\n\r\nworld,\r\n";
     String[][] res = {
       {"foo", "baar"},
       {""},
       {"hello", ""},
       {""},
       {"world", ""}
     };
     CSVParser parser = new CSVParser(new StringReader(code), CSVStrategy.EXCEL_STRATEGY);
     String[][] tmp = parser.getAllValues();
     assertEquals(res.length, tmp.length);
     assertTrue(tmp.length > 0);
     for (int i = 0; i < res.length; i++) {
       assertTrue(Arrays.equals(res[i], tmp[i]));
     }
   }

   public void testEndOfFileBehaviourExcel() throws Exception {
     String[] codes = {
         "hello,\r\n\r\nworld,\r\n",
         "hello,\r\n\r\nworld,",
         "hello,\r\n\r\nworld,\"\"\r\n",
         "hello,\r\n\r\nworld,\"\"",
         "hello,\r\n\r\nworld,\n",
         "hello,\r\n\r\nworld,",
         "hello,\r\n\r\nworld,\"\"\n",
         "hello,\r\n\r\nworld,\"\""
         };
     String[][] res = {
       {"hello", ""},
       {""},  // ExcelStrategy does not ignore empty lines
       {"world", ""}
     };
     String code;
     for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) {
       code = codes[codeIndex];
       CSVParser parser = new CSVParser(new StringReader(code), CSVStrategy.EXCEL_STRATEGY);
       String[][] tmp = parser.getAllValues();
       assertEquals(res.length, tmp.length);
       assertTrue(tmp.length > 0);
       for (int i = 0; i < res.length; i++) {
         assertTrue(Arrays.equals(res[i], tmp[i]));
       }
     }
   }

   public void testEndOfFileBehaviorCSV() throws Exception {
     String[] codes = {
         "hello,\r\n\r\nworld,\r\n",
         "hello,\r\n\r\nworld,",
         "hello,\r\n\r\nworld,\"\"\r\n",
         "hello,\r\n\r\nworld,\"\"",
         "hello,\r\n\r\nworld,\n",
         "hello,\r\n\r\nworld,",
         "hello,\r\n\r\nworld,\"\"\n",
         "hello,\r\n\r\nworld,\"\""
         };
     String[][] res = {
       {"hello", ""},  // CSV Strategy ignores empty lines
       {"world", ""}
     };
     String code;
     for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) {
       code = codes[codeIndex];
       CSVParser parser = new CSVParser(new StringReader(code));
       String[][] tmp = parser.getAllValues();
       assertEquals(res.length, tmp.length);
       assertTrue(tmp.length > 0);
       for (int i = 0; i < res.length; i++) {
         assertTrue(Arrays.equals(res[i], tmp[i]));
       }
     }
   }

   public void testEmptyLineBehaviourExcel() throws Exception {
     String[] codes = {
         "hello,\r\n\r\n\r\n",
         "hello,\n\n\n",
         "hello,\"\"\r\n\r\n\r\n",
         "hello,\"\"\n\n\n"
         };
     String[][] res = {
       {"hello", ""},
       {""},  // ExcelStrategy does not ignore empty lines
       {""}
     };
     String code;
     for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) {
       code = codes[codeIndex];
       CSVParser parser = new CSVParser(new StringReader(code), CSVStrategy.EXCEL_STRATEGY);
       String[][] tmp = parser.getAllValues();
       assertEquals(res.length, tmp.length);
       assertTrue(tmp.length > 0);
       for (int i = 0; i < res.length; i++) {
         assertTrue(Arrays.equals(res[i], tmp[i]));
       }
     }
   }

   public void testEmptyLineBehaviourCSV() throws Exception {
     String[] codes = {
         "hello,\r\n\r\n\r\n",
         "hello,\n\n\n",
         "hello,\"\"\r\n\r\n\r\n",
         "hello,\"\"\n\n\n"
         };
     String[][] res = {
       {"hello", ""}  // CSV Strategy ignores empty lines
     };
     String code;
     for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) {
       code = codes[codeIndex];
       CSVParser parser = new CSVParser(new StringReader(code));
       String[][] tmp = parser.getAllValues();
       assertEquals(res.length, tmp.length);
       assertTrue(tmp.length > 0);
       for (int i = 0; i < res.length; i++) {
         assertTrue(Arrays.equals(res[i], tmp[i]));
       }
     }
   }

   public void OLDtestBackslashEscaping() throws IOException {
     String code =
       "one,two,three\n"
       + "on\\\"e,two\n"
       + "on\"e,two\n"
       + "one,\"tw\\\"o\"\n"
       + "one,\"t\\,wo\"\n"
       + "one,two,\"th,ree\"\n"
       + "\"a\\\\\"\n"
       + "a\\,b\n"
       + "\"a\\\\,b\"";
     String[][] res = {
         { "one", "two", "three" },
         { "on\\\"e", "two" },
         { "on\"e", "two" },
         { "one", "tw\"o" },
         { "one", "t\\,wo" },  // backslash in quotes only escapes a delimiter (",")
         { "one", "two", "th,ree" },
         { "a\\\\" },     // backslash in quotes only escapes a delimiter (",")
         { "a\\", "b" },  // a backslash must be returnd
         { "a\\\\,b" }    // backslash in quotes only escapes a delimiter (",")
       };
     CSVParser parser = new CSVParser(new StringReader(code));
     String[][] tmp = parser.getAllValues();
     assertEquals(res.length, tmp.length);
     assertTrue(tmp.length > 0);
     for (int i = 0; i < res.length; i++) {
       assertTrue(Arrays.equals(res[i], tmp[i]));
     }
   }

   public void testBackslashEscaping() throws IOException {

     // To avoid confusion over the need for escaping chars in java code,
     // We will test with a forward slash as the escape char, and a single
     // quote as the encapsulator.

     String code =
       "one,two,three\n" // 0
       + "'',''\n"       // 1) empty encapsulators
       + "/',/'\n"       // 2) single encapsulators
       + "'/'','/''\n"   // 3) single encapsulators encapsulated via escape
       + "'''',''''\n"   // 4) single encapsulators encapsulated via doubling
       + "/,,/,\n"       // 5) separator escaped
       + "//,//\n"       // 6) escape escaped
       + "'//','//'\n"   // 7) escape escaped in encapsulation
       + "   8   ,   \"quoted \"\" /\" // string\"   \n"     // don't eat spaces
       + "9,   /\n   \n"  // escaped newline
       + "";
     String[][] res = {
         { "one", "two", "three" }, // 0
         { "", "" },                // 1
         { "'", "'" },              // 2
         { "'", "'" },              // 3
         { "'", "'" },              // 4
         { ",", "," },              // 5
         { "/", "/" },              // 6
         { "/", "/" },              // 7
         { "   8   ", "   \"quoted \"\" \" / string\"   " },
         { "9", "   \n   " },
       };


     CSVStrategy strategy = new CSVStrategy(',','\'',CSVStrategy.COMMENTS_DISABLED,'/',false,false,true,true,"\n");

     CSVParser parser = new CSVParser(new StringReader(code), strategy);
     String[][] tmp = parser.getAllValues();
     assertTrue(tmp.length > 0);
     for (int i = 0; i < res.length; i++) {
       assertTrue(Arrays.equals(res[i], tmp[i]));
     }
   }

   public void testBackslashEscaping2() throws IOException {

     // To avoid confusion over the need for escaping chars in java code,
     // We will test with a forward slash as the escape char, and a single
     // quote as the encapsulator.

     String code = ""
       + " , , \n"           // 1)
       + " \t ,  , \n"       // 2)
       + " // , /, , /,\n"   // 3)
       + "";
     String[][] res = {
         { " ", " ", " " },         // 1
         { " \t ", "  ", " " },         // 2
         { " / ", " , ", " ," },         //3
       };


     CSVStrategy strategy = new CSVStrategy
         (',', CSVStrategy.ENCAPSULATOR_DISABLED, CSVStrategy.COMMENTS_DISABLED, '/', false, false, true, true, "\n");

     CSVParser parser = new CSVParser(new StringReader(code), strategy);
     String[][] tmp = parser.getAllValues();
     assertTrue(tmp.length > 0);

     if (!CSVPrinterTest.equals(res, tmp)) {
       assertTrue(false);
     }

   }


   public void testDefaultStrategy() throws IOException {

     String code = ""
         + "a,b\n"            // 1)
         + "\"\n\",\" \"\n"   // 2)
         + "\"\",#\n"   // 2)
         ;
     String[][] res = {
         { "a", "b" },
         { "\n", " " },
         { "", "#" },
     };

     CSVStrategy strategy = CSVStrategy.DEFAULT_STRATEGY;
     assertEquals(CSVStrategy.COMMENTS_DISABLED, strategy.getCommentStart());

     CSVParser parser = new CSVParser(new StringReader(code), strategy);
     String[][] tmp = parser.getAllValues();
     assertTrue(tmp.length > 0);

     if (!CSVPrinterTest.equals(res, tmp)) {
       assertTrue(false);
     }

     String[][] res_comments = {
         { "a", "b" },
         { "\n", " " },
         { ""},
     };

     strategy = new CSVStrategy(',','"','#');
     parser = new CSVParser(new StringReader(code), strategy);
     tmp = parser.getAllValues();

     if (!CSVPrinterTest.equals(res_comments, tmp)) {
       assertTrue(false);
     }
   }


     public void testUnicodeEscape() throws IOException {
       String code = "abc,\\u0070\\u0075\\u0062\\u006C\\u0069\\u0063";
       CSVStrategy strategy = (CSVStrategy)CSVStrategy.DEFAULT_STRATEGY.clone();
       strategy.setUnicodeEscapeInterpretation(true);
       CSVParser parser = new CSVParser(new StringReader(code), strategy);
       String[] data = parser.getLine();
       assertEquals(2, data.length);
       assertEquals("abc", data[0]);
       assertEquals("public", data[1]);
     }

     public void testCarriageReturnLineFeedEndings() throws IOException {
      String code = "foo\r\nbaar,\r\nhello,world\r\n,kanu";
      CSVParser parser = new CSVParser(new StringReader(code));
      String[][] data = parser.getAllValues();
      assertEquals(4, data.length);
     }

     public void testIgnoreEmptyLines() throws IOException {
       String code = "\nfoo,baar\n\r\n,\n\n,world\r\n\n";
       //String code = "world\r\n\n";
       //String code = "foo;baar\r\n\r\nhello;\r\n\r\nworld;\r\n";
       CSVParser parser = new CSVParser(new StringReader(code));
       String[][] data = parser.getAllValues();
       assertEquals(3, data.length);
     }

     public void testLineTokenConsistency() throws IOException {
       String code = "\nfoo,baar\n\r\n,\n\n,world\r\n\n";
       CSVParser parser = new CSVParser(new StringReader(code));
       String[][] data = parser.getAllValues();
       parser = new CSVParser(new StringReader(code));
       CSVParser parser1 = new CSVParser(new StringReader(code));
       for (int i = 0; i < data.length; i++) {
         assertTrue(Arrays.equals(parser1.getLine(), data[i]));
         for (int j = 0; j < data[i].length; j++) {
           assertEquals(parser.nextValue(), data[i][j]);
         }
       }
     }

     // From SANDBOX-153
      public void testDelimiterIsWhitespace() throws IOException {
          String code = "one\ttwo\t\tfour \t five\t six";
          TestCSVParser parser = new TestCSVParser(new StringReader(code), CSVStrategy.TDF_STRATEGY);
          assertEquals(CSVParser.TT_TOKEN + ";one;", parser.testNextToken());
          assertEquals(CSVParser.TT_TOKEN + ";two;", parser.testNextToken());
          assertEquals(CSVParser.TT_TOKEN + ";;", parser.testNextToken());
          assertEquals(CSVParser.TT_TOKEN + ";four;", parser.testNextToken());
          assertEquals(CSVParser.TT_TOKEN + ";five;", parser.testNextToken());
          assertEquals(CSVParser.TT_EOF + ";six;", parser.testNextToken());
      }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.solr.internal.csv;

	import java.io.IOException;
	import java.io.Reader;
	import java.io.StringReader;
	import java.util.Arrays;

	import junit.framework.TestCase;

	/**
	* CSVParserTest
	*
	* The test are organized in three different sections:
	* The 'setter/getter' section, the lexer section and finally the parser
	* section. In case a test fails, you should follow a top-down approach for
	* fixing a potential bug (it's likely that the parser itself fails if the lexer
	* has problems...).
	*/
	public class CSVParserTest extends TestCase {

	/**
	* TestCSVParser.
	*/
	static class TestCSVParser extends CSVParser {
	/**
	* Test parser to investigate the type of the internal Token.
	* @param in a Reader
	*/
	TestCSVParser(Reader in) {
	super(in);
	}

	TestCSVParser(Reader in, CSVStrategy strategy) {
	super(in, strategy);
	}
	/**
	* Calls super.nextToken() and prints out a String representation of token
	* type and content.
	* @return String representation of token type and content
	* @throws IOException like {@link CSVParser#nextToken()}
	*/
	public String testNextToken() throws IOException {
	Token t = super.nextToken();
	return Integer.toString(t.type) + ";" + t.content + ";";
	}
	}

	// ======================================================
	// lexer tests
	// ======================================================

	// Single line (without comment)
	public void testNextToken1() throws IOException {
	String code = "abc,def, hijk, lmnop, qrst,uv ,wxy ,z , ,";
	TestCSVParser parser = new TestCSVParser(new StringReader(code));
	assertEquals(CSVParser.TT_TOKEN + ";abc;", parser.testNextToken());
	assertEquals(CSVParser.TT_TOKEN + ";def;", parser.testNextToken());
	assertEquals(CSVParser.TT_TOKEN + ";hijk;", parser.testNextToken());
	assertEquals(CSVParser.TT_TOKEN + ";lmnop;", parser.testNextToken());
	assertEquals(CSVParser.TT_TOKEN + ";qrst;", parser.testNextToken());
	assertEquals(CSVParser.TT_TOKEN + ";uv;", parser.testNextToken());
	assertEquals(CSVParser.TT_TOKEN + ";wxy;", parser.testNextToken());
	assertEquals(CSVParser.TT_TOKEN + ";z;", parser.testNextToken());
	assertEquals(CSVParser.TT_TOKEN + ";;", parser.testNextToken());
	assertEquals(CSVParser.TT_EOF + ";;", parser.testNextToken());
	}

	// multiline including comments (and empty lines)
	public void testNextToken2() throws IOException {
	/* file: 1,2,3,
	* a,b x,c
	*
	* # this is a comment
	* d,e,
	*
	*/
	String code = "1,2,3,\na,b x,c\n#foo\n\nd,e,\n\n";
	CSVStrategy strategy = (CSVStrategy)CSVStrategy.DEFAULT_STRATEGY.clone();
	// strategy.setIgnoreEmptyLines(false);
	strategy.setCommentStart('#');

	TestCSVParser parser = new TestCSVParser(new StringReader(code), strategy);


	assertEquals(CSVParser.TT_TOKEN + ";1;", parser.testNextToken());
	assertEquals(CSVParser.TT_TOKEN + ";2;", parser.testNextToken());
	assertEquals(CSVParser.TT_TOKEN + ";3;", parser.testNextToken());
	assertEquals(CSVParser.TT_EORECORD + ";;", parser.testNextToken());
	assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
	assertEquals(CSVParser.TT_TOKEN + ";b x;", parser.testNextToken());
	assertEquals(CSVParser.TT_EORECORD + ";c;", parser.testNextToken());
	assertEquals(CSVParser.TT_EORECORD + ";;", parser.testNextToken());
	assertEquals(CSVParser.TT_TOKEN + ";d;", parser.testNextToken());
	assertEquals(CSVParser.TT_TOKEN + ";e;", parser.testNextToken());
	assertEquals(CSVParser.TT_EORECORD + ";;", parser.testNextToken());
	assertEquals(CSVParser.TT_EOF + ";;", parser.testNextToken());
	assertEquals(CSVParser.TT_EOF + ";;", parser.testNextToken());

	}

	// simple token with escaping
	public void testNextToken3() throws IOException {
	/* file: a,\,,b
	* \,,
	*/
	String code = "a,\\,,b\n\\,,";
	CSVStrategy strategy = (CSVStrategy)CSVStrategy.DEFAULT_STRATEGY.clone();
	strategy.setCommentStart('#');
	TestCSVParser parser = new TestCSVParser(new StringReader(code), strategy);

	assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
	// an unquoted single backslash is not an escape char
	assertEquals(CSVParser.TT_TOKEN + ";\\;", parser.testNextToken());
	assertEquals(CSVParser.TT_TOKEN + ";;", parser.testNextToken());
	assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken());
	// an unquoted single backslash is not an escape char
	assertEquals(CSVParser.TT_TOKEN + ";\\;", parser.testNextToken());
	assertEquals(CSVParser.TT_TOKEN + ";;", parser.testNextToken());
	assertEquals(CSVParser.TT_EOF + ";;", parser.testNextToken());
	}

	// encapsulator tokenizer (sinle line)
	public void testNextToken4() throws IOException {
	/* file: a,"foo",b
	* a, " foo",b
	* a,"foo " ,b // whitespace after closing encapsulator
	* a, " foo " ,b
	*/
	String code =
	"a,\"foo\",b\na, \" foo\",b\na,\"foo \" ,b\na, \" foo \" ,b";
	TestCSVParser parser = new TestCSVParser(new StringReader(code));
	assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
	assertEquals(CSVParser.TT_TOKEN + ";foo;", parser.testNextToken());
	assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken());
	assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
	assertEquals(CSVParser.TT_TOKEN + "; foo;", parser.testNextToken());
	assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken());
	assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
	assertEquals(CSVParser.TT_TOKEN + ";foo ;", parser.testNextToken());
	assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken());
	assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
	assertEquals(CSVParser.TT_TOKEN + "; foo ;", parser.testNextToken());
	// assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken());
	assertEquals(CSVParser.TT_EOF + ";b;", parser.testNextToken());
	}

	// encapsulator tokenizer (multi line, delimiter in string)
	public void testNextToken5() throws IOException {
	String code =
	"a,\"foo\n\",b\n\"foo\n baar ,,,\"\n\"\n\t \n\"";
	TestCSVParser parser = new TestCSVParser(new StringReader(code));
	assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
	assertEquals(CSVParser.TT_TOKEN + ";foo\n;", parser.testNextToken());
	assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken());
	assertEquals(CSVParser.TT_EORECORD + ";foo\n baar ,,,;",
	parser.testNextToken());
	assertEquals(CSVParser.TT_EOF + ";\n\t \n;", parser.testNextToken());

	}

	// change delimiters, comment, encapsulater
	public void testNextToken6() throws IOException {
	/* file: a;'b and \' more
	* '
	* !comment;;;;
	* ;;
	*/
	String code = "a;'b and '' more\n'\n!comment;;;;\n;;";
	TestCSVParser parser = new TestCSVParser(new StringReader(code), new CSVStrategy(';', '\'', '!'));
	assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
	assertEquals(
	CSVParser.TT_EORECORD + ";b and ' more\n;",
	parser.testNextToken());
	}


	// ======================================================
	// parser tests
	// ======================================================

	String code =
	"a,b,c,d\n"
	+ " a , b , 1 2 \n"
	+ "\"foo baar\", b,\n"
	// + " \"foo\n,,\n\"\",,\n\\\"\",d,e\n";
	+ " \"foo\n,,\n\"\",,\n\"\"\",d,e\n"; // changed to use standard CSV escaping
	String[][] res = {
	{"a", "b", "c", "d"},
	{"a", "b", "1 2"},
	{"foo baar", "b", ""},
	{"foo\n,,\n\",,\n\"", "d", "e"}
	};
	public void testGetLine() throws IOException {
	CSVParser parser = new CSVParser(new StringReader(code));
	String[] tmp = null;
	for (int i = 0; i < res.length; i++) {
	tmp = parser.getLine();
	assertTrue(Arrays.equals(res[i], tmp));
	}
	tmp = parser.getLine();
	assertTrue(tmp == null);
	}

	public void testNextValue() throws IOException {
	CSVParser parser = new CSVParser(new StringReader(code));
	String tmp = null;
	for (int i = 0; i < res.length; i++) {
	for (int j = 0; j < res[i].length; j++) {
	tmp = parser.nextValue();
	assertEquals(res[i][j], tmp);
	}
	}
	tmp = parser.nextValue();
	assertTrue(tmp == null);
	}

	public void testGetAllValues() throws IOException {
	CSVParser parser = new CSVParser(new StringReader(code));
	String[][] tmp = parser.getAllValues();
	assertEquals(res.length, tmp.length);
	assertTrue(tmp.length > 0);
	for (int i = 0; i < res.length; i++) {
	assertTrue(Arrays.equals(res[i], tmp[i]));
	}
	}

	public void testExcelStrategy1() throws IOException {
	String code =
	"value1,value2,value3,value4\r\na,b,c,d\r\n x,,,"
	+ "\r\n\r\n\"\"\"hello\"\"\",\" \"\"world\"\"\",\"abc\ndef\",\r\n";
	String[][] res = {
	{"value1", "value2", "value3", "value4"},
	{"a", "b", "c", "d"},
	{" x", "", "", ""},
	{""},
	{"\"hello\"", " \"world\"", "abc\ndef", ""}
	};
	CSVParser parser = new CSVParser(new StringReader(code), CSVStrategy.EXCEL_STRATEGY);
	String[][] tmp = parser.getAllValues();
	assertEquals(res.length, tmp.length);
	assertTrue(tmp.length > 0);
	for (int i = 0; i < res.length; i++) {
	assertTrue(Arrays.equals(res[i], tmp[i]));
	}
	}

	public void testExcelStrategy2() throws Exception {
	String code = "foo,baar\r\n\r\nhello,\r\n\r\nworld,\r\n";
	String[][] res = {
	{"foo", "baar"},
	{""},
	{"hello", ""},
	{""},
	{"world", ""}
	};
	CSVParser parser = new CSVParser(new StringReader(code), CSVStrategy.EXCEL_STRATEGY);
	String[][] tmp = parser.getAllValues();
	assertEquals(res.length, tmp.length);
	assertTrue(tmp.length > 0);
	for (int i = 0; i < res.length; i++) {
	assertTrue(Arrays.equals(res[i], tmp[i]));
	}
	}

	public void testEndOfFileBehaviourExcel() throws Exception {
	String[] codes = {
	"hello,\r\n\r\nworld,\r\n",
	"hello,\r\n\r\nworld,",
	"hello,\r\n\r\nworld,\"\"\r\n",
	"hello,\r\n\r\nworld,\"\"",
	"hello,\r\n\r\nworld,\n",
	"hello,\r\n\r\nworld,",
	"hello,\r\n\r\nworld,\"\"\n",
	"hello,\r\n\r\nworld,\"\""
	};
	String[][] res = {
	{"hello", ""},
	{""}, // ExcelStrategy does not ignore empty lines
	{"world", ""}
	};
	String code;
	for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) {
	code = codes[codeIndex];
	CSVParser parser = new CSVParser(new StringReader(code), CSVStrategy.EXCEL_STRATEGY);
	String[][] tmp = parser.getAllValues();
	assertEquals(res.length, tmp.length);
	assertTrue(tmp.length > 0);
	for (int i = 0; i < res.length; i++) {
	assertTrue(Arrays.equals(res[i], tmp[i]));
	}
	}
	}

	public void testEndOfFileBehaviorCSV() throws Exception {
	String[] codes = {
	"hello,\r\n\r\nworld,\r\n",
	"hello,\r\n\r\nworld,",
	"hello,\r\n\r\nworld,\"\"\r\n",
	"hello,\r\n\r\nworld,\"\"",
	"hello,\r\n\r\nworld,\n",
	"hello,\r\n\r\nworld,",
	"hello,\r\n\r\nworld,\"\"\n",
	"hello,\r\n\r\nworld,\"\""
	};
	String[][] res = {
	{"hello", ""}, // CSV Strategy ignores empty lines
	{"world", ""}
	};
	String code;
	for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) {
	code = codes[codeIndex];
	CSVParser parser = new CSVParser(new StringReader(code));
	String[][] tmp = parser.getAllValues();
	assertEquals(res.length, tmp.length);
	assertTrue(tmp.length > 0);
	for (int i = 0; i < res.length; i++) {
	assertTrue(Arrays.equals(res[i], tmp[i]));
	}
	}
	}

	public void testEmptyLineBehaviourExcel() throws Exception {
	String[] codes = {
	"hello,\r\n\r\n\r\n",
	"hello,\n\n\n",
	"hello,\"\"\r\n\r\n\r\n",
	"hello,\"\"\n\n\n"
	};
	String[][] res = {
	{"hello", ""},
	{""}, // ExcelStrategy does not ignore empty lines
	{""}
	};
	String code;
	for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) {
	code = codes[codeIndex];
	CSVParser parser = new CSVParser(new StringReader(code), CSVStrategy.EXCEL_STRATEGY);
	String[][] tmp = parser.getAllValues();
	assertEquals(res.length, tmp.length);
	assertTrue(tmp.length > 0);
	for (int i = 0; i < res.length; i++) {
	assertTrue(Arrays.equals(res[i], tmp[i]));
	}
	}
	}

	public void testEmptyLineBehaviourCSV() throws Exception {
	String[] codes = {
	"hello,\r\n\r\n\r\n",
	"hello,\n\n\n",
	"hello,\"\"\r\n\r\n\r\n",
	"hello,\"\"\n\n\n"
	};
	String[][] res = {
	{"hello", ""} // CSV Strategy ignores empty lines
	};
	String code;
	for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) {
	code = codes[codeIndex];
	CSVParser parser = new CSVParser(new StringReader(code));
	String[][] tmp = parser.getAllValues();
	assertEquals(res.length, tmp.length);
	assertTrue(tmp.length > 0);
	for (int i = 0; i < res.length; i++) {
	assertTrue(Arrays.equals(res[i], tmp[i]));
	}
	}
	}

	public void OLDtestBackslashEscaping() throws IOException {
	String code =
	"one,two,three\n"
	+ "on\\\"e,two\n"
	+ "on\"e,two\n"
	+ "one,\"tw\\\"o\"\n"
	+ "one,\"t\\,wo\"\n"
	+ "one,two,\"th,ree\"\n"
	+ "\"a\\\\\"\n"
	+ "a\\,b\n"
	+ "\"a\\\\,b\"";
	String[][] res = {
	{ "one", "two", "three" },
	{ "on\\\"e", "two" },
	{ "on\"e", "two" },
	{ "one", "tw\"o" },
	{ "one", "t\\,wo" }, // backslash in quotes only escapes a delimiter (",")
	{ "one", "two", "th,ree" },
	{ "a\\\\" }, // backslash in quotes only escapes a delimiter (",")
	{ "a\\", "b" }, // a backslash must be returnd
	{ "a\\\\,b" } // backslash in quotes only escapes a delimiter (",")
	};
	CSVParser parser = new CSVParser(new StringReader(code));
	String[][] tmp = parser.getAllValues();
	assertEquals(res.length, tmp.length);
	assertTrue(tmp.length > 0);
	for (int i = 0; i < res.length; i++) {
	assertTrue(Arrays.equals(res[i], tmp[i]));
	}
	}

	public void testBackslashEscaping() throws IOException {

	// To avoid confusion over the need for escaping chars in java code,
	// We will test with a forward slash as the escape char, and a single
	// quote as the encapsulator.

	String code =
	"one,two,three\n" // 0
	+ "'',''\n" // 1) empty encapsulators
	+ "/',/'\n" // 2) single encapsulators
	+ "'/'','/''\n" // 3) single encapsulators encapsulated via escape
	+ "'''',''''\n" // 4) single encapsulators encapsulated via doubling
	+ "/,,/,\n" // 5) separator escaped
	+ "//,//\n" // 6) escape escaped
	+ "'//','//'\n" // 7) escape escaped in encapsulation
	+ " 8 , \"quoted \"\" /\" // string\" \n" // don't eat spaces
	+ "9, /\n \n" // escaped newline
	+ "";
	String[][] res = {
	{ "one", "two", "three" }, // 0
	{ "", "" }, // 1
	{ "'", "'" }, // 2
	{ "'", "'" }, // 3
	{ "'", "'" }, // 4
	{ ",", "," }, // 5
	{ "/", "/" }, // 6
	{ "/", "/" }, // 7
	{ " 8 ", " \"quoted \"\" \" / string\" " },
	{ "9", " \n " },
	};


	CSVStrategy strategy = new CSVStrategy(',','\'',CSVStrategy.COMMENTS_DISABLED,'/',false,false,true,true,"\n");

	CSVParser parser = new CSVParser(new StringReader(code), strategy);
	String[][] tmp = parser.getAllValues();
	assertTrue(tmp.length > 0);
	for (int i = 0; i < res.length; i++) {
	assertTrue(Arrays.equals(res[i], tmp[i]));
	}
	}

	public void testBackslashEscaping2() throws IOException {

	// To avoid confusion over the need for escaping chars in java code,
	// We will test with a forward slash as the escape char, and a single
	// quote as the encapsulator.

	String code = ""
	+ " , , \n" // 1)
	+ " \t , , \n" // 2)
	+ " // , /, , /,\n" // 3)
	+ "";
	String[][] res = {
	{ " ", " ", " " }, // 1
	{ " \t ", " ", " " }, // 2
	{ " / ", " , ", " ," }, //3
	};


	CSVStrategy strategy = new CSVStrategy
	(',', CSVStrategy.ENCAPSULATOR_DISABLED, CSVStrategy.COMMENTS_DISABLED, '/', false, false, true, true, "\n");

	CSVParser parser = new CSVParser(new StringReader(code), strategy);
	String[][] tmp = parser.getAllValues();
	assertTrue(tmp.length > 0);

	if (!CSVPrinterTest.equals(res, tmp)) {
	assertTrue(false);
	}

	}


	public void testDefaultStrategy() throws IOException {

	String code = ""
	+ "a,b\n" // 1)
	+ "\"\n\",\" \"\n" // 2)
	+ "\"\",#\n" // 2)
	;
	String[][] res = {
	{ "a", "b" },
	{ "\n", " " },
	{ "", "#" },
	};

	CSVStrategy strategy = CSVStrategy.DEFAULT_STRATEGY;
	assertEquals(CSVStrategy.COMMENTS_DISABLED, strategy.getCommentStart());

	CSVParser parser = new CSVParser(new StringReader(code), strategy);
	String[][] tmp = parser.getAllValues();
	assertTrue(tmp.length > 0);

	if (!CSVPrinterTest.equals(res, tmp)) {
	assertTrue(false);
	}

	String[][] res_comments = {
	{ "a", "b" },
	{ "\n", " " },
	{ ""},
	};

	strategy = new CSVStrategy(',','"','#');
	parser = new CSVParser(new StringReader(code), strategy);
	tmp = parser.getAllValues();

	if (!CSVPrinterTest.equals(res_comments, tmp)) {
	assertTrue(false);
	}
	}


	public void testUnicodeEscape() throws IOException {
	String code = "abc,\\u0070\\u0075\\u0062\\u006C\\u0069\\u0063";
	CSVStrategy strategy = (CSVStrategy)CSVStrategy.DEFAULT_STRATEGY.clone();
	strategy.setUnicodeEscapeInterpretation(true);
	CSVParser parser = new CSVParser(new StringReader(code), strategy);
	String[] data = parser.getLine();
	assertEquals(2, data.length);
	assertEquals("abc", data[0]);
	assertEquals("public", data[1]);
	}

	public void testCarriageReturnLineFeedEndings() throws IOException {
	String code = "foo\r\nbaar,\r\nhello,world\r\n,kanu";
	CSVParser parser = new CSVParser(new StringReader(code));
	String[][] data = parser.getAllValues();
	assertEquals(4, data.length);
	}

	public void testIgnoreEmptyLines() throws IOException {
	String code = "\nfoo,baar\n\r\n,\n\n,world\r\n\n";
	//String code = "world\r\n\n";
	//String code = "foo;baar\r\n\r\nhello;\r\n\r\nworld;\r\n";
	CSVParser parser = new CSVParser(new StringReader(code));
	String[][] data = parser.getAllValues();
	assertEquals(3, data.length);
	}

	public void testLineTokenConsistency() throws IOException {
	String code = "\nfoo,baar\n\r\n,\n\n,world\r\n\n";
	CSVParser parser = new CSVParser(new StringReader(code));
	String[][] data = parser.getAllValues();
	parser = new CSVParser(new StringReader(code));
	CSVParser parser1 = new CSVParser(new StringReader(code));
	for (int i = 0; i < data.length; i++) {
	assertTrue(Arrays.equals(parser1.getLine(), data[i]));
	for (int j = 0; j < data[i].length; j++) {
	assertEquals(parser.nextValue(), data[i][j]);
	}
	}
	}

	// From SANDBOX-153
	public void testDelimiterIsWhitespace() throws IOException {
	String code = "one\ttwo\t\tfour \t five\t six";
	TestCSVParser parser = new TestCSVParser(new StringReader(code), CSVStrategy.TDF_STRATEGY);
	assertEquals(CSVParser.TT_TOKEN + ";one;", parser.testNextToken());
	assertEquals(CSVParser.TT_TOKEN + ";two;", parser.testNextToken());
	assertEquals(CSVParser.TT_TOKEN + ";;", parser.testNextToken());
	assertEquals(CSVParser.TT_TOKEN + ";four;", parser.testNextToken());
	assertEquals(CSVParser.TT_TOKEN + ";five;", parser.testNextToken());
	assertEquals(CSVParser.TT_EOF + ";six;", parser.testNextToken());
	}
	}