| /** |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.hadoop.mapreduce.lib.partition; |
| |
| import org.apache.commons.logging.Log; |
| import org.apache.commons.logging.LogFactory; |
| import org.junit.Test; |
| |
| import static org.junit.Assert.assertEquals; |
| import static org.junit.Assert.assertTrue; |
| |
| public class TestKeyFieldHelper { |
| private static final Log LOG = LogFactory.getLog(TestKeyFieldHelper.class); |
| /** |
| * Test is key-field-helper's parse option. |
| */ |
| @Test |
| public void testparseOption() throws Exception { |
| KeyFieldHelper helper = new KeyFieldHelper(); |
| helper.setKeyFieldSeparator("\t"); |
| String keySpecs = "-k1.2,3.4"; |
| String eKeySpecs = keySpecs; |
| helper.parseOption(keySpecs); |
| String actKeySpecs = helper.keySpecs().get(0).toString(); |
| assertEquals("KeyFieldHelper's parsing is garbled", eKeySpecs, actKeySpecs); |
| |
| // test -k a.b |
| keySpecs = "-k 1.2"; |
| eKeySpecs = "-k1.2,0.0"; |
| helper = new KeyFieldHelper(); |
| helper.parseOption(keySpecs); |
| actKeySpecs = helper.keySpecs().get(0).toString(); |
| assertEquals("KeyFieldHelper's parsing is garbled", eKeySpecs, actKeySpecs); |
| |
| keySpecs = "-nr -k1.2,3.4"; |
| eKeySpecs = "-k1.2,3.4nr"; |
| helper = new KeyFieldHelper(); |
| helper.parseOption(keySpecs); |
| actKeySpecs = helper.keySpecs().get(0).toString(); |
| assertEquals("KeyFieldHelper's parsing is garbled", eKeySpecs, actKeySpecs); |
| |
| keySpecs = "-nr -k1.2,3.4n"; |
| eKeySpecs = "-k1.2,3.4n"; |
| helper = new KeyFieldHelper(); |
| helper.parseOption(keySpecs); |
| actKeySpecs = helper.keySpecs().get(0).toString(); |
| assertEquals("KeyFieldHelper's parsing is garbled", eKeySpecs, actKeySpecs); |
| |
| keySpecs = "-nr -k1.2,3.4r"; |
| eKeySpecs = "-k1.2,3.4r"; |
| helper = new KeyFieldHelper(); |
| helper.parseOption(keySpecs); |
| actKeySpecs = helper.keySpecs().get(0).toString(); |
| assertEquals("KeyFieldHelper's parsing is garbled", eKeySpecs, actKeySpecs); |
| |
| keySpecs = "-nr -k1.2,3.4 -k5.6,7.8n -k9.10,11.12r -k13.14,15.16nr"; |
| //1st |
| eKeySpecs = "-k1.2,3.4nr"; |
| helper = new KeyFieldHelper(); |
| helper.parseOption(keySpecs); |
| actKeySpecs = helper.keySpecs().get(0).toString(); |
| assertEquals("KeyFieldHelper's parsing is garbled", eKeySpecs, actKeySpecs); |
| // 2nd |
| eKeySpecs = "-k5.6,7.8n"; |
| actKeySpecs = helper.keySpecs().get(1).toString(); |
| assertEquals("KeyFieldHelper's parsing is garbled", eKeySpecs, actKeySpecs); |
| //3rd |
| eKeySpecs = "-k9.10,11.12r"; |
| actKeySpecs = helper.keySpecs().get(2).toString(); |
| assertEquals("KeyFieldHelper's parsing is garbled", eKeySpecs, actKeySpecs); |
| //4th |
| eKeySpecs = "-k13.14,15.16nr"; |
| actKeySpecs = helper.keySpecs().get(3).toString(); |
| assertEquals("KeyFieldHelper's parsing is garbled", eKeySpecs, actKeySpecs); |
| |
| keySpecs = "-k1.2n,3.4"; |
| eKeySpecs = "-k1.2,3.4n"; |
| helper = new KeyFieldHelper(); |
| helper.parseOption(keySpecs); |
| actKeySpecs = helper.keySpecs().get(0).toString(); |
| assertEquals("KeyFieldHelper's parsing is garbled", eKeySpecs, actKeySpecs); |
| |
| keySpecs = "-k1.2r,3.4"; |
| eKeySpecs = "-k1.2,3.4r"; |
| helper = new KeyFieldHelper(); |
| helper.parseOption(keySpecs); |
| actKeySpecs = helper.keySpecs().get(0).toString(); |
| assertEquals("KeyFieldHelper's parsing is garbled", eKeySpecs, actKeySpecs); |
| |
| keySpecs = "-k1.2nr,3.4"; |
| eKeySpecs = "-k1.2,3.4nr"; |
| helper = new KeyFieldHelper(); |
| helper.parseOption(keySpecs); |
| actKeySpecs = helper.keySpecs().get(0).toString(); |
| assertEquals("KeyFieldHelper's parsing is garbled", eKeySpecs, actKeySpecs); |
| |
| keySpecs = "-k1.2,3.4n"; |
| eKeySpecs = "-k1.2,3.4n"; |
| helper = new KeyFieldHelper(); |
| helper.parseOption(keySpecs); |
| actKeySpecs = helper.keySpecs().get(0).toString(); |
| assertEquals("KeyFieldHelper's parsing is garbled", eKeySpecs, actKeySpecs); |
| |
| keySpecs = "-k1.2,3.4r"; |
| eKeySpecs = "-k1.2,3.4r"; |
| helper = new KeyFieldHelper(); |
| helper.parseOption(keySpecs); |
| actKeySpecs = helper.keySpecs().get(0).toString(); |
| assertEquals("KeyFieldHelper's parsing is garbled", eKeySpecs, actKeySpecs); |
| |
| keySpecs = "-k1.2,3.4nr"; |
| eKeySpecs = "-k1.2,3.4nr"; |
| helper = new KeyFieldHelper(); |
| helper.parseOption(keySpecs); |
| actKeySpecs = helper.keySpecs().get(0).toString(); |
| assertEquals("KeyFieldHelper's parsing is garbled", eKeySpecs, actKeySpecs); |
| |
| keySpecs = "-nr -k1.2,3.4 -k5.6,7.8"; |
| eKeySpecs = "-k1.2,3.4nr"; |
| helper = new KeyFieldHelper(); |
| helper.parseOption(keySpecs); |
| actKeySpecs = helper.keySpecs().get(0).toString(); |
| assertEquals("KeyFieldHelper's parsing is garbled", eKeySpecs, actKeySpecs); |
| eKeySpecs = "-k5.6,7.8nr"; |
| actKeySpecs = helper.keySpecs().get(1).toString(); |
| assertEquals("KeyFieldHelper's parsing is garbled", eKeySpecs, actKeySpecs); |
| |
| keySpecs = "-n -k1.2,3.4 -k5.6,7.8"; |
| eKeySpecs = "-k1.2,3.4n"; |
| helper = new KeyFieldHelper(); |
| helper.parseOption(keySpecs); |
| actKeySpecs = helper.keySpecs().get(0).toString(); |
| assertEquals("KeyFieldHelper's parsing is garbled", eKeySpecs, actKeySpecs); |
| eKeySpecs = "-k5.6,7.8n"; |
| actKeySpecs = helper.keySpecs().get(1).toString(); |
| assertEquals("KeyFieldHelper's parsing is garbled", eKeySpecs, actKeySpecs); |
| |
| keySpecs = "-r -k1.2,3.4 -k5.6,7.8"; |
| eKeySpecs = "-k1.2,3.4r"; |
| helper = new KeyFieldHelper(); |
| helper.parseOption(keySpecs); |
| actKeySpecs = helper.keySpecs().get(0).toString(); |
| assertEquals("KeyFieldHelper's parsing is garbled", eKeySpecs, actKeySpecs); |
| eKeySpecs = "-k5.6,7.8r"; |
| actKeySpecs = helper.keySpecs().get(1).toString(); |
| assertEquals("KeyFieldHelper's parsing is garbled", eKeySpecs, actKeySpecs); |
| |
| keySpecs = "-k1.2,3.4n -k5.6,7.8"; |
| eKeySpecs = "-k1.2,3.4n"; |
| helper = new KeyFieldHelper(); |
| helper.parseOption(keySpecs); |
| actKeySpecs = helper.keySpecs().get(0).toString(); |
| assertEquals("KeyFieldHelper's parsing is garbled", eKeySpecs, actKeySpecs); |
| eKeySpecs = "-k5.6,7.8"; |
| actKeySpecs = helper.keySpecs().get(1).toString(); |
| assertEquals("KeyFieldHelper's parsing is garbled", eKeySpecs, actKeySpecs); |
| |
| keySpecs = "-k1.2,3.4r -k5.6,7.8"; |
| eKeySpecs = "-k1.2,3.4r"; |
| helper = new KeyFieldHelper(); |
| helper.parseOption(keySpecs); |
| actKeySpecs = helper.keySpecs().get(0).toString(); |
| assertEquals("KeyFieldHelper's parsing is garbled", eKeySpecs, actKeySpecs); |
| eKeySpecs = "-k5.6,7.8"; |
| actKeySpecs = helper.keySpecs().get(1).toString(); |
| assertEquals("KeyFieldHelper's parsing is garbled", eKeySpecs, actKeySpecs); |
| |
| keySpecs = "-k1.2,3.4nr -k5.6,7.8"; |
| eKeySpecs = "-k1.2,3.4nr"; |
| helper = new KeyFieldHelper(); |
| helper.parseOption(keySpecs); |
| actKeySpecs = helper.keySpecs().get(0).toString(); |
| assertEquals("KeyFieldHelper's parsing is garbled", eKeySpecs, actKeySpecs); |
| eKeySpecs = "-k5.6,7.8"; |
| actKeySpecs = helper.keySpecs().get(1).toString(); |
| assertEquals("KeyFieldHelper's parsing is garbled", eKeySpecs, actKeySpecs); |
| |
| keySpecs = "-n"; |
| eKeySpecs = "-k1.1,0.0n"; |
| helper = new KeyFieldHelper(); |
| helper.parseOption(keySpecs); |
| actKeySpecs = helper.keySpecs().get(0).toString(); |
| assertEquals("KeyFieldHelper's parsing is garbled", eKeySpecs, actKeySpecs); |
| |
| keySpecs = "-r"; |
| eKeySpecs = "-k1.1,0.0r"; |
| helper = new KeyFieldHelper(); |
| helper.parseOption(keySpecs); |
| actKeySpecs = helper.keySpecs().get(0).toString(); |
| assertEquals("KeyFieldHelper's parsing is garbled", eKeySpecs, actKeySpecs); |
| |
| keySpecs = "-nr"; |
| eKeySpecs = "-k1.1,0.0nr"; |
| helper = new KeyFieldHelper(); |
| helper.parseOption(keySpecs); |
| actKeySpecs = helper.keySpecs().get(0).toString(); |
| assertEquals("KeyFieldHelper's parsing is garbled", eKeySpecs, actKeySpecs); |
| } |
| |
| /** |
| * Test is key-field-helper's getWordLengths. |
| */ |
| @Test |
| public void testGetWordLengths() throws Exception { |
| KeyFieldHelper helper = new KeyFieldHelper(); |
| helper.setKeyFieldSeparator("\t"); |
| // test getWordLengths with unspecified key-specifications |
| String input = "hi"; |
| int[] result = helper.getWordLengths(input.getBytes(), 0, 2); |
| assertTrue(equals(result, new int[] {1})); |
| |
| // set the key specs |
| helper.setKeyFieldSpec(1, 2); |
| |
| // test getWordLengths with 3 words |
| input = "hi\thello there"; |
| result = helper.getWordLengths(input.getBytes(), 0, input.length()); |
| assertTrue(equals(result, new int[] {2, 2, 11})); |
| |
| // test getWordLengths with 4 words but with a different separator |
| helper.setKeyFieldSeparator(" "); |
| input = "hi hello\tthere you"; |
| result = helper.getWordLengths(input.getBytes(), 0, input.length()); |
| assertTrue(equals(result, new int[] {3, 2, 11, 3})); |
| |
| // test with non zero start index |
| input = "hi hello there you where me there"; |
| // ..................... |
| result = helper.getWordLengths(input.getBytes(), 10, 33); |
| assertTrue(equals(result, new int[] {5, 4, 3, 5, 2, 3})); |
| |
| input = "hi hello there you where me "; |
| // .................. |
| result = helper.getWordLengths(input.getBytes(), 10, input.length()); |
| assertTrue(equals(result, new int[] {5, 4, 3, 5, 2, 0})); |
| |
| input = ""; |
| result = helper.getWordLengths(input.getBytes(), 0, 0); |
| assertTrue(equals(result, new int[] {1, 0})); |
| |
| input = " abc"; |
| result = helper.getWordLengths(input.getBytes(), 0, 5); |
| assertTrue(equals(result, new int[] {3, 0, 0, 3})); |
| |
| input = " abc"; |
| result = helper.getWordLengths(input.getBytes(), 0, 2); |
| assertTrue(equals(result, new int[] {3, 0, 0, 0})); |
| |
| input = " abc "; |
| result = helper.getWordLengths(input.getBytes(), 0, 2); |
| assertTrue(equals(result, new int[] {2, 0, 1})); |
| |
| helper.setKeyFieldSeparator("abcd"); |
| input = "abc"; |
| result = helper.getWordLengths(input.getBytes(), 0, 3); |
| assertTrue(equals(result, new int[] {1, 3})); |
| } |
| |
| /** |
| * Test is key-field-helper's getStartOffset/getEndOffset. |
| */ |
| @Test |
| public void testgetStartEndOffset() throws Exception { |
| KeyFieldHelper helper = new KeyFieldHelper(); |
| helper.setKeyFieldSeparator("\t"); |
| // test getStartOffset with -k1,2 |
| helper.setKeyFieldSpec(1, 2); |
| String input = "hi\thello"; |
| String expectedOutput = input; |
| testKeySpecs(input, expectedOutput, helper); |
| |
| // test getStartOffset with -k1.0,0 .. should result into start = -1 |
| helper = new KeyFieldHelper(); |
| helper.setKeyFieldSeparator("\t"); |
| helper.parseOption("-k1.0,0"); |
| testKeySpecs(input, null, helper); |
| |
| // test getStartOffset with -k1,0 |
| helper = new KeyFieldHelper(); |
| helper.setKeyFieldSeparator("\t"); |
| helper.parseOption("-k1,0"); |
| expectedOutput = input; |
| testKeySpecs(input, expectedOutput, helper); |
| |
| // test getStartOffset with -k1.2,0 |
| helper = new KeyFieldHelper(); |
| helper.setKeyFieldSeparator("\t"); |
| helper.parseOption("-k1.2,0"); |
| expectedOutput = "i\thello"; |
| testKeySpecs(input, expectedOutput, helper); |
| |
| // test getWordLengths with -k1.0,2.3 |
| helper = new KeyFieldHelper(); |
| helper.setKeyFieldSeparator("\t"); |
| helper.parseOption("-k1.1,2.3"); |
| expectedOutput = "hi\thel"; |
| testKeySpecs(input, expectedOutput, helper); |
| |
| // test getWordLengths with -k1.2,2.3 |
| helper = new KeyFieldHelper(); |
| helper.setKeyFieldSeparator("\t"); |
| helper.parseOption("-k1.2,2.3"); |
| expectedOutput = "i\thel"; |
| testKeySpecs(input, expectedOutput, helper); |
| |
| // test getStartOffset with -k1.2,3.0 |
| helper = new KeyFieldHelper(); |
| helper.setKeyFieldSeparator("\t"); |
| helper.parseOption("-k1.2,3.0"); |
| expectedOutput = "i\thello"; |
| testKeySpecs(input, expectedOutput, helper); |
| |
| // test getStartOffset with -k2,2 |
| helper = new KeyFieldHelper(); |
| helper.setKeyFieldSeparator("\t"); |
| helper.parseOption("-k2,2"); |
| expectedOutput = "hello"; |
| testKeySpecs(input, expectedOutput, helper); |
| |
| // test getStartOffset with -k3.0,4.0 |
| helper = new KeyFieldHelper(); |
| helper.setKeyFieldSeparator("\t"); |
| helper.parseOption("-k3.1,4.0"); |
| testKeySpecs(input, null, helper); |
| |
| // test getStartOffset with -k2.1 |
| helper = new KeyFieldHelper(); |
| input = "123123123123123hi\thello\thow"; |
| helper.setKeyFieldSeparator("\t"); |
| helper.parseOption("-k2.1"); |
| expectedOutput = "hello\thow"; |
| testKeySpecs(input, expectedOutput, helper, 15, input.length()); |
| |
| // test getStartOffset with -k2.1,4 with end ending on \t |
| helper = new KeyFieldHelper(); |
| input = "123123123123123hi\thello\t\thow\tare"; |
| helper.setKeyFieldSeparator("\t"); |
| helper.parseOption("-k2.1,3"); |
| expectedOutput = "hello\t"; |
| testKeySpecs(input, expectedOutput, helper, 17, input.length()); |
| |
| // test getStartOffset with -k2.1 with end ending on \t |
| helper = new KeyFieldHelper(); |
| input = "123123123123123hi\thello\thow\tare"; |
| helper.setKeyFieldSeparator("\t"); |
| helper.parseOption("-k2.1"); |
| expectedOutput = "hello\thow\t"; |
| testKeySpecs(input, expectedOutput, helper, 17, 28); |
| |
| // test getStartOffset with -k2.1,3 with smaller length |
| helper = new KeyFieldHelper(); |
| input = "123123123123123hi\thello\thow"; |
| helper.setKeyFieldSeparator("\t"); |
| helper.parseOption("-k2.1,3"); |
| expectedOutput = "hello"; |
| testKeySpecs(input, expectedOutput, helper, 15, 23); |
| } |
| |
| private void testKeySpecs(String input, String expectedOutput, |
| KeyFieldHelper helper) { |
| testKeySpecs(input, expectedOutput, helper, 0, -1); |
| } |
| |
| private void testKeySpecs(String input, String expectedOutput, |
| KeyFieldHelper helper, int s1, int e1) { |
| LOG.info("input : " + input); |
| String keySpecs = helper.keySpecs().get(0).toString(); |
| LOG.info("keyspecs : " + keySpecs); |
| byte[] inputBytes = input.getBytes(); // get the input bytes |
| if (e1 == -1) { |
| e1 = inputBytes.length; |
| } |
| LOG.info("length : " + e1); |
| // get the word lengths |
| int[] indices = helper.getWordLengths(inputBytes, s1, e1); |
| // get the start index |
| int start = helper.getStartOffset(inputBytes, s1, e1, indices, |
| helper.keySpecs().get(0)); |
| LOG.info("start : " + start); |
| if (expectedOutput == null) { |
| assertEquals("Expected -1 when the start index is invalid", -1, start); |
| return; |
| } |
| // get the end index |
| int end = helper.getEndOffset(inputBytes, s1, e1, indices, |
| helper.keySpecs().get(0)); |
| LOG.info("end : " + end); |
| //my fix |
| end = (end >= inputBytes.length) ? inputBytes.length -1 : end; |
| int length = end + 1 - start; |
| LOG.info("length : " + length); |
| byte[] outputBytes = new byte[length]; |
| System.arraycopy(inputBytes, start, outputBytes, 0, length); |
| String output = new String(outputBytes); |
| LOG.info("output : " + output); |
| LOG.info("expected-output : " + expectedOutput); |
| assertEquals(keySpecs + " failed on input '" + input + "'", |
| expectedOutput, output); |
| } |
| |
| // check for equality of 2 int arrays |
| private boolean equals(int[] test, int[] expected) { |
| // check array length |
| if (test[0] != expected[0]) { |
| return false; |
| } |
| // if length is same then check the contents |
| for (int i = 0; i < test[0] && i < expected[0]; ++i) { |
| if (test[i] != expected[i]) { |
| return false; |
| } |
| } |
| return true; |
| } |
| } |