| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| |
| package org.apache.flink.types.parser; |
| |
| import org.apache.flink.configuration.ConfigConstants; |
| import org.apache.flink.types.StringValue; |
| import org.apache.flink.types.Value; |
| import org.junit.Test; |
| |
| import java.nio.charset.Charset; |
| import java.nio.charset.StandardCharsets; |
| |
| import static org.junit.Assert.assertEquals; |
| import static org.junit.Assert.assertTrue; |
| |
| public class VarLengthStringParserTest { |
| |
| public StringValueParser parser = new StringValueParser(); |
| |
| @Test |
| public void testGetValue() { |
| Value v = parser.createValue(); |
| assertTrue(v instanceof StringValue); |
| } |
| |
| @Test |
| public void testParseValidUnquotedStrings() { |
| |
| this.parser = new StringValueParser(); |
| |
| // check valid strings with out whitespaces and trailing delimiter |
| byte[] recBytes = "abcdefgh|i|jklmno|".getBytes(ConfigConstants.DEFAULT_CHARSET); |
| StringValue s = new StringValue(); |
| |
| int startPos = 0; |
| startPos = parser.parseField(recBytes, startPos, recBytes.length, new byte[] {'|'}, s); |
| assertTrue(startPos == 9); |
| assertTrue(s.getValue().equals("abcdefgh")); |
| |
| startPos = parser.parseField(recBytes, startPos, recBytes.length, new byte[] {'|'}, s); |
| assertTrue(startPos == 11); |
| assertTrue(s.getValue().equals("i")); |
| |
| startPos = parser.parseField(recBytes, startPos, recBytes.length, new byte[] {'|'}, s); |
| assertTrue(startPos == 18); |
| assertTrue(s.getValue().equals("jklmno")); |
| |
| |
| // check single field not terminated |
| recBytes = "abcde".getBytes(ConfigConstants.DEFAULT_CHARSET); |
| startPos = 0; |
| startPos = parser.parseField(recBytes, startPos, recBytes.length, new byte[] {'|'}, s); |
| assertTrue(startPos == 5); |
| assertTrue(s.getValue().equals("abcde")); |
| |
| // check last field not terminated |
| recBytes = "abcde|fg".getBytes(ConfigConstants.DEFAULT_CHARSET); |
| startPos = 0; |
| startPos = parser.parseField(recBytes, startPos, recBytes.length, new byte[] {'|'}, s); |
| assertTrue(startPos == 6); |
| assertTrue(s.getValue().equals("abcde")); |
| |
| startPos = parser.parseField(recBytes, startPos, recBytes.length, new byte[] {'|'}, s); |
| assertTrue(startPos == 8); |
| assertTrue(s.getValue().equals("fg")); |
| } |
| |
| @Test |
| public void testParseValidQuotedStrings() { |
| |
| this.parser = new StringValueParser(); |
| this.parser.enableQuotedStringParsing((byte)'"'); |
| |
| // check valid strings with out whitespaces and trailing delimiter |
| byte[] recBytes = "\"abcdefgh\"|\"i\"|\"jklmno\"|".getBytes(ConfigConstants.DEFAULT_CHARSET); |
| StringValue s = new StringValue(); |
| |
| int startPos = 0; |
| startPos = parser.parseField(recBytes, startPos, recBytes.length, new byte[] {'|'}, s); |
| assertTrue(startPos == 11); |
| assertTrue(s.getValue().equals("abcdefgh")); |
| |
| startPos = parser.parseField(recBytes, startPos, recBytes.length, new byte[] {'|'}, s); |
| assertTrue(startPos == 15); |
| assertTrue(s.getValue().equals("i")); |
| |
| startPos = parser.parseField(recBytes, startPos, recBytes.length, new byte[] {'|'}, s); |
| assertTrue(startPos == 24); |
| assertTrue(s.getValue().equals("jklmno")); |
| |
| |
| // check single field not terminated |
| recBytes = "\"abcde\"".getBytes(ConfigConstants.DEFAULT_CHARSET); |
| startPos = 0; |
| startPos = parser.parseField(recBytes, startPos, recBytes.length, new byte[] {'|'}, s); |
| assertTrue(startPos == 7); |
| assertTrue(s.getValue().equals("abcde")); |
| |
| // check last field not terminated |
| recBytes = "\"abcde\"|\"fg\"".getBytes(ConfigConstants.DEFAULT_CHARSET); |
| startPos = 0; |
| startPos = parser.parseField(recBytes, startPos, recBytes.length, new byte[] {'|'}, s); |
| assertTrue(startPos == 8); |
| assertTrue(s.getValue().equals("abcde")); |
| |
| startPos = parser.parseField(recBytes, startPos, recBytes.length, new byte[] {'|'}, s); |
| assertTrue(startPos == 12); |
| assertTrue(s.getValue().equals("fg")); |
| |
| // check delimiter in quotes |
| recBytes = "\"abcde|fg\"|\"hij|kl|mn|op\"|".getBytes(ConfigConstants.DEFAULT_CHARSET); |
| startPos = 0; |
| startPos = parser.parseField(recBytes, startPos, recBytes.length, new byte[] {'|'}, s); |
| assertTrue(startPos == 11); |
| assertTrue(s.getValue().equals("abcde|fg")); |
| |
| startPos = parser.parseField(recBytes, startPos, recBytes.length, new byte[] {'|'}, s); |
| assertTrue(startPos == 26); |
| assertTrue(s.getValue().equals("hij|kl|mn|op")); |
| |
| // check delimiter in quotes last field not terminated |
| recBytes = "\"abcde|fg\"|\"hij|kl|mn|op\"".getBytes(ConfigConstants.DEFAULT_CHARSET); |
| startPos = 0; |
| startPos = parser.parseField(recBytes, startPos, recBytes.length, new byte[] {'|'}, s); |
| assertTrue(startPos == 11); |
| assertTrue(s.getValue().equals("abcde|fg")); |
| |
| startPos = parser.parseField(recBytes, startPos, recBytes.length, new byte[] {'|'}, s); |
| assertTrue(startPos == 25); |
| assertTrue(s.getValue().equals("hij|kl|mn|op")); |
| } |
| |
| @Test |
| public void testParseValidMixedStrings() { |
| |
| this.parser = new StringValueParser(); |
| this.parser.enableQuotedStringParsing((byte)'@'); |
| |
| // check valid strings with out whitespaces and trailing delimiter |
| byte[] recBytes = "@abcde|gh@|@i@|jklmnopq|@rs@|tuv".getBytes(ConfigConstants.DEFAULT_CHARSET); |
| StringValue s = new StringValue(); |
| |
| int startPos = 0; |
| startPos = parser.parseField(recBytes, startPos, recBytes.length, new byte[] {'|'}, s); |
| assertTrue(startPos == 11); |
| assertTrue(s.getValue().equals("abcde|gh")); |
| |
| startPos = parser.parseField(recBytes, startPos, recBytes.length, new byte[] {'|'}, s); |
| assertTrue(startPos == 15); |
| assertTrue(s.getValue().equals("i")); |
| |
| startPos = parser.parseField(recBytes, startPos, recBytes.length, new byte[] {'|'}, s); |
| assertTrue(startPos == 24); |
| assertTrue(s.getValue().equals("jklmnopq")); |
| |
| startPos = parser.parseField(recBytes, startPos, recBytes.length, new byte[] {'|'}, s); |
| assertTrue(startPos == 29); |
| assertTrue(s.getValue().equals("rs")); |
| |
| startPos = parser.parseField(recBytes, startPos, recBytes.length, new byte[] {'|'}, s); |
| assertTrue(startPos == 32); |
| assertTrue(s.getValue().equals("tuv")); |
| |
| } |
| |
| |
| @Test |
| public void testParseInvalidQuotedStrings() { |
| |
| this.parser = new StringValueParser(); |
| this.parser.enableQuotedStringParsing((byte)'"'); |
| |
| // check valid strings with out whitespaces and trailing delimiter |
| byte[] recBytes = "\"abcdefgh\"-|\"jklmno ".getBytes(ConfigConstants.DEFAULT_CHARSET); |
| StringValue s = new StringValue(); |
| |
| int startPos = 0; |
| startPos = parser.parseField(recBytes, startPos, recBytes.length, new byte[] {'|'}, s); |
| assertTrue(startPos < 0); |
| |
| startPos = 12; |
| startPos = parser.parseField(recBytes, startPos, recBytes.length, new byte[] {'|'}, s); |
| assertTrue(startPos < 0); |
| } |
| |
| @Test |
| public void testParseValidMixedStringsWithCharset() { |
| |
| Charset charset = StandardCharsets.US_ASCII; |
| this.parser = new StringValueParser(); |
| this.parser.enableQuotedStringParsing((byte) '@'); |
| |
| // check valid strings with out whitespaces and trailing delimiter |
| byte[] recBytes = "@abcde|gh@|@i@|jklmnopq|@rs@|tuv".getBytes(ConfigConstants.DEFAULT_CHARSET); |
| StringValue s = new StringValue(); |
| |
| int startPos = 0; |
| parser.setCharset(charset); |
| startPos = parser.parseField(recBytes, startPos, recBytes.length, new byte[]{'|'}, s); |
| assertEquals(11, startPos); |
| assertEquals("abcde|gh", s.getValue()); |
| } |
| } |