blob: cbb06eabf05cf78ee4c62419647aa0f80685a998 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.util;
import java.io.ByteArrayInputStream;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.util.LineReader;
import org.junit.Test;
import junit.framework.Assert;
public class TestLineReader {
private LineReader lineReader;
private String TestData;
private String Delimiter;
private Text line;
@Test
public void testCustomDelimiter() throws Exception {
/* TEST_1
* The test scenario is the tail of the buffer
* equals the starting character/s of delimiter
*
* The Test Data is such that,
*
* 1) we will have "</entity>" as delimiter
*
* 2) The tail of the current buffer would be "</"
* which matches with the starting character sequence of delimiter.
*
* 3) The Head of the next buffer would be "id>"
* which does NOT match with the remaining characters of delimiter.
*
* 4) Input data would be prefixed by char 'a'
* about numberOfCharToFillTheBuffer times.
* So that, one iteration to buffer the input data,
* would end at '</' ie equals starting 2 char of delimiter
*
* 5) For this we would take BufferSize as 64 * 1024;
*
* Check Condition
* In the second key value pair, the value should contain
* "</" from currentToken and
* "id>" from next token
*/
Delimiter="</entity>";
String CurrentBufferTailToken=
"</entity><entity><id>Gelesh</";
// Ending part of Input Data Buffer
// It contains '</' ie delimiter character
String NextBufferHeadToken=
"id><name>Omathil</name></entity>";
// Supposing the start of next buffer is this
String Expected =
(CurrentBufferTailToken+NextBufferHeadToken)
.replace(Delimiter, "");
// Expected ,must capture from both the buffer, excluding Delimiter
String TestPartOfInput = CurrentBufferTailToken+NextBufferHeadToken;
int BufferSize=64 * 1024;
int numberOfCharToFillTheBuffer=BufferSize-CurrentBufferTailToken.length();
StringBuilder fillerString=new StringBuilder();
for (int i=0;i<numberOfCharToFillTheBuffer;i++) {
fillerString.append('a'); // char 'a' as a filler for the test string
}
TestData = fillerString + TestPartOfInput;
lineReader = new LineReader(
new ByteArrayInputStream(TestData.getBytes()),Delimiter.getBytes());
line = new Text();
lineReader.readLine(line);
Assert.assertEquals(fillerString.toString(),line.toString());
lineReader.readLine(line);
Assert.assertEquals(Expected, line.toString());
/*TEST_2
* The test scenario is such that,
* the character/s preceding the delimiter,
* equals the starting character/s of delimiter
*/
Delimiter = "record";
StringBuilder TestStringBuilder = new StringBuilder();
TestStringBuilder.append(Delimiter+"Kerala ");
TestStringBuilder.append(Delimiter+"Bangalore");
TestStringBuilder.append(Delimiter+" North Korea");
TestStringBuilder.append(Delimiter+Delimiter+
"Guantanamo");
TestStringBuilder.append(Delimiter+"ecord"+"recor"+"core"); //~EOF with 're'
TestData=TestStringBuilder.toString();
lineReader = new LineReader(
new ByteArrayInputStream(TestData.getBytes()),Delimiter.getBytes());
lineReader.readLine(line);
Assert.assertEquals("",line.toString());
lineReader.readLine(line);
Assert.assertEquals("Kerala ",line.toString());
lineReader.readLine(line);
Assert.assertEquals("Bangalore",line.toString());
lineReader.readLine(line);
Assert.assertEquals(" North Korea",line.toString());
lineReader.readLine(line);
Assert.assertEquals("",line.toString());
lineReader.readLine(line);
Assert.assertEquals("Guantanamo",line.toString());
lineReader.readLine(line);
Assert.assertEquals(("ecord"+"recor"+"core"),line.toString());
}
}