hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestLineReader.java - hadoop - Git at Google

 /**
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package org.apache.hadoop.util;

 import java.io.ByteArrayInputStream;

 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.util.LineReader;
 import org.junit.Test;

 import org.junit.Assert;

 public class TestLineReader {
   private LineReader lineReader;
   private String TestData;
   private String Delimiter;
   private Text line;

   @Test
   public void testCustomDelimiter() throws Exception {
     /* TEST_1
      * The test scenario is the tail of the buffer
      * equals the starting character/s of delimiter
      *
      * The Test Data is such that,
      *
      * 1) we will have "</entity>" as delimiter
      *
      * 2) The tail of the current buffer would be "</"
      *    which matches with the starting character sequence of delimiter.
      *
      * 3) The Head of the next buffer would be   "id>"
      *    which does NOT match with the remaining characters of delimiter.
      *
      * 4) Input data would be prefixed by char 'a'
      *    about numberOfCharToFillTheBuffer times.
      *    So that, one iteration to buffer the input data,
      *    would end at '</' ie equals starting 2 char of delimiter
      *
      * 5) For this we would take BufferSize as 64 * 1024;
      *
      * Check Condition
      *  In the second key value pair, the value should contain
      *  "</"  from currentToken and
      *  "id>" from next token
      */

     Delimiter="</entity>";

     String CurrentBufferTailToken=
         "</entity><entity><id>Gelesh</";
     // Ending part of Input Data Buffer
     // It contains '</' ie delimiter character

     String NextBufferHeadToken=
         "id><name>Omathil</name></entity>";
     // Supposing the start of next buffer is this

     String Expected =
         (CurrentBufferTailToken+NextBufferHeadToken)
         .replace(Delimiter, "");
     // Expected ,must capture from both the buffer, excluding Delimiter

     String TestPartOfInput = CurrentBufferTailToken+NextBufferHeadToken;

     int BufferSize=64 * 1024;
     int numberOfCharToFillTheBuffer =
             BufferSize - CurrentBufferTailToken.length();
     StringBuilder fillerString=new StringBuilder();
     for (int i=0; i<numberOfCharToFillTheBuffer; i++) {
       fillerString.append('a'); // char 'a' as a filler for the test string
     }

     TestData = fillerString + TestPartOfInput;
     lineReader = new LineReader(
         new ByteArrayInputStream(TestData.getBytes()), Delimiter.getBytes());

     line = new Text();

     lineReader.readLine(line);
     Assert.assertEquals(fillerString.toString(), line.toString());

     lineReader.readLine(line);
     Assert.assertEquals(Expected, line.toString());

     /*TEST_2
      * The test scenario is such that,
      * the character/s preceding the delimiter,
      * equals the starting character/s of delimiter
      */

     Delimiter = "record";
     StringBuilder TestStringBuilder = new StringBuilder();

     TestStringBuilder.append(Delimiter + "Kerala ");
     TestStringBuilder.append(Delimiter + "Bangalore");
     TestStringBuilder.append(Delimiter + " North Korea");
     TestStringBuilder.append(Delimiter + Delimiter+
                         "Guantanamo");
     TestStringBuilder.append(Delimiter + "ecord"
             + "recor" + "core"); //~EOF with 're'

     TestData=TestStringBuilder.toString();

     lineReader = new LineReader(
         new ByteArrayInputStream(TestData.getBytes()), Delimiter.getBytes());

     lineReader.readLine(line);
     Assert.assertEquals("", line.toString());
     lineReader.readLine(line);
     Assert.assertEquals("Kerala ", line.toString());

     lineReader.readLine(line);
     Assert.assertEquals("Bangalore", line.toString());

     lineReader.readLine(line);
     Assert.assertEquals(" North Korea", line.toString());

     lineReader.readLine(line);
     Assert.assertEquals("", line.toString());
     lineReader.readLine(line);
     Assert.assertEquals("Guantanamo", line.toString());

     lineReader.readLine(line);
     Assert.assertEquals(("ecord"+"recor"+"core"), line.toString());

     // Test 3
     // The test scenario is such that,
     // aaaabccc split by aaab
     TestData = "aaaabccc";
     Delimiter = "aaab";
     lineReader = new LineReader(
         new ByteArrayInputStream(TestData.getBytes()), Delimiter.getBytes());

     lineReader.readLine(line);
     Assert.assertEquals("a", line.toString());
     lineReader.readLine(line);
     Assert.assertEquals("ccc", line.toString());
   }
 }
	/**
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package org.apache.hadoop.util;

	import java.io.ByteArrayInputStream;

	import org.apache.hadoop.io.Text;
	import org.apache.hadoop.util.LineReader;
	import org.junit.Test;

	import org.junit.Assert;

	public class TestLineReader {
	private LineReader lineReader;
	private String TestData;
	private String Delimiter;
	private Text line;

	@Test
	public void testCustomDelimiter() throws Exception {
	/* TEST_1
	* The test scenario is the tail of the buffer
	* equals the starting character/s of delimiter
	*
	* The Test Data is such that,
	*
	* 1) we will have "</entity>" as delimiter
	*
	* 2) The tail of the current buffer would be "</"
	* which matches with the starting character sequence of delimiter.
	*
	* 3) The Head of the next buffer would be "id>"
	* which does NOT match with the remaining characters of delimiter.
	*
	* 4) Input data would be prefixed by char 'a'
	* about numberOfCharToFillTheBuffer times.
	* So that, one iteration to buffer the input data,
	* would end at '</' ie equals starting 2 char of delimiter
	*
	* 5) For this we would take BufferSize as 64 * 1024;
	*
	* Check Condition
	* In the second key value pair, the value should contain
	* "</" from currentToken and
	* "id>" from next token
	*/

	Delimiter="</entity>";

	String CurrentBufferTailToken=
	"</entity><entity><id>Gelesh</";
	// Ending part of Input Data Buffer
	// It contains '</' ie delimiter character

	String NextBufferHeadToken=
	"id><name>Omathil</name></entity>";
	// Supposing the start of next buffer is this

	String Expected =
	(CurrentBufferTailToken+NextBufferHeadToken)
	.replace(Delimiter, "");
	// Expected ,must capture from both the buffer, excluding Delimiter

	String TestPartOfInput = CurrentBufferTailToken+NextBufferHeadToken;

	int BufferSize=64 * 1024;
	int numberOfCharToFillTheBuffer =
	BufferSize - CurrentBufferTailToken.length();
	StringBuilder fillerString=new StringBuilder();
	for (int i=0; i<numberOfCharToFillTheBuffer; i++) {
	fillerString.append('a'); // char 'a' as a filler for the test string
	}

	TestData = fillerString + TestPartOfInput;
	lineReader = new LineReader(
	new ByteArrayInputStream(TestData.getBytes()), Delimiter.getBytes());

	line = new Text();

	lineReader.readLine(line);
	Assert.assertEquals(fillerString.toString(), line.toString());

	lineReader.readLine(line);
	Assert.assertEquals(Expected, line.toString());

	/*TEST_2
	* The test scenario is such that,
	* the character/s preceding the delimiter,
	* equals the starting character/s of delimiter
	*/

	Delimiter = "record";
	StringBuilder TestStringBuilder = new StringBuilder();

	TestStringBuilder.append(Delimiter + "Kerala ");
	TestStringBuilder.append(Delimiter + "Bangalore");
	TestStringBuilder.append(Delimiter + " North Korea");
	TestStringBuilder.append(Delimiter + Delimiter+
	"Guantanamo");
	TestStringBuilder.append(Delimiter + "ecord"
	+ "recor" + "core"); //~EOF with 're'

	TestData=TestStringBuilder.toString();

	lineReader = new LineReader(
	new ByteArrayInputStream(TestData.getBytes()), Delimiter.getBytes());

	lineReader.readLine(line);
	Assert.assertEquals("", line.toString());
	lineReader.readLine(line);
	Assert.assertEquals("Kerala ", line.toString());

	lineReader.readLine(line);
	Assert.assertEquals("Bangalore", line.toString());

	lineReader.readLine(line);
	Assert.assertEquals(" North Korea", line.toString());

	lineReader.readLine(line);
	Assert.assertEquals("", line.toString());
	lineReader.readLine(line);
	Assert.assertEquals("Guantanamo", line.toString());

	lineReader.readLine(line);
	Assert.assertEquals(("ecord"+"recor"+"core"), line.toString());

	// Test 3
	// The test scenario is such that,
	// aaaabccc split by aaab
	TestData = "aaaabccc";
	Delimiter = "aaab";
	lineReader = new LineReader(
	new ByteArrayInputStream(TestData.getBytes()), Delimiter.getBytes());

	lineReader.readLine(line);
	Assert.assertEquals("a", line.toString());
	lineReader.readLine(line);
	Assert.assertEquals("ccc", line.toString());
	}
	}