flink-libraries/flink-table-common/src/main/java/org/apache/flink/table/dataformat/BinaryString.java - flink - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.	See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.	You may obtain a copy of the License at
  *
  *		http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package org.apache.flink.table.dataformat;

 import org.apache.flink.api.common.typeinfo.TypeInfo;
 import org.apache.flink.core.memory.MemorySegment;
 import org.apache.flink.core.memory.MemorySegmentFactory;
 import org.apache.flink.table.dataformat.util.BinaryRowUtil;
 import org.apache.flink.table.dataformat.util.MultiSegUtil;
 import org.apache.flink.table.runtime.util.StringUtf8Utils;
 import org.apache.flink.table.typeutils.BinaryStringTypeFactory;
 import org.apache.flink.table.util.hash.Murmur32;

 import com.esotericsoftware.kryo.Kryo;
 import com.esotericsoftware.kryo.KryoSerializable;
 import com.esotericsoftware.kryo.io.Input;
 import com.esotericsoftware.kryo.io.Output;
 import org.apache.commons.codec.binary.Hex;

 import java.math.BigDecimal;
 import java.security.MessageDigest;
 import java.security.NoSuchAlgorithmException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;

 import static org.apache.flink.util.Preconditions.checkArgument;

 /**
  * A utf8 string which is backed by {@link MemorySegment} instead of String. Its data may span
  * multiple {@link MemorySegment}s.
  *
  * <p>Used for internal table-level implementation. The built-in operator will use it for comparison,
  * search, and so on.
  *
  * <p>{@code BinaryString} are influenced by Apache Spark UTF8String.
  */
 @TypeInfo(BinaryStringTypeFactory.class)
 public final class BinaryString implements Comparable<BinaryString>, Cloneable, KryoSerializable {

 	// TODO remove it for thread safe.
 	public static final BinaryString EMPTY_UTF8 = BinaryString.fromString("");
 	static {
 		EMPTY_UTF8.ensureEncoded();
 	}

 	static final BinaryString[] EMPTY_STRING_ARRAY = new BinaryString[0];

 	private MemorySegment[] segments;
 	private int offset;
 	private int numBytes;

 	/** Cache the java string for the binary string to avoid redundant decode. */
 	private String javaString;

 	public BinaryString() {
 		pointTo((MemorySegment[]) null, -1, -1, null);
 	}

 	private BinaryString(String str) {
 		pointToString(str);
 	}

 	private BinaryString(MemorySegment[] segments, int offset, int numBytes) {
 		pointTo(segments, offset, numBytes);
 	}

 	private BinaryString(MemorySegment[] segments, int offset, int numBytes, String javaString) {
 		pointTo(segments, offset, numBytes, javaString);
 	}

 	public void pointTo(byte[] bytes, int offset, int numBytes) {
 		pointTo(bytes, offset, numBytes, null);
 	}

 	public void pointTo(byte[] bytes, int offset, int numBytes, String javaString) {
 		MemorySegment[] segments = this.segments;
 		if (segments != null && segments.length == 1) {
 			segments[0].pointTo(bytes);
 		} else {
 			segments = new MemorySegment[] {MemorySegmentFactory.wrap(bytes)};
 		}
 		pointTo(segments, offset, numBytes, javaString);
 	}

 	public void pointTo(MemorySegment[] segments, int offset, int numBytes) {
 		pointTo(segments, offset, numBytes, null);
 	}

 	private void pointToString(String javaString) {
 		pointTo((MemorySegment[]) null, -1, -1, javaString);
 	}

 	private void pointTo(MemorySegment[] segments, int offset, int numBytes, String javaString) {
 		this.segments = segments;
 		this.offset = offset;
 		this.numBytes = numBytes;
 		this.javaString = javaString;
 	}

 	/**
 	 * Creates an BinaryString from given address (base and offset) and length.
 	 */
 	public static BinaryString fromAddress(
 			MemorySegment[] segments, int offset, int numBytes) {
 		return new BinaryString(segments, offset, numBytes);
 	}

 	public static BinaryString fromString(String str) {
 		if (str == null) {
 			return null;
 		} else {
 			return fromNonNullString(str);
 		}
 	}

 	private static BinaryString fromNonNullString(String str) {
 		return new BinaryString(str);
 	}

 	public static BinaryString fromString(BinaryString str) {
 		return str;
 	}

 	public static BinaryString fromString(Object obj) {
 		if (obj == null) {
 			return null;
 		} else if (obj instanceof String) {
 			return fromNonNullString((String) obj);
 		} else if (obj instanceof BinaryString) {
 			return (BinaryString) obj;
 		} else {
 			return fromNonNullString(obj.toString());
 		}
 	}

 	public static BinaryString fromBytes(byte[] bytes) {
 		if (bytes != null) {
 			return fromBytes(bytes, 0, bytes.length);
 		} else {
 			return null;
 		}
 	}

 	public static BinaryString fromBytes(byte[] bytes, int offset, int numBytes) {
 		return fromBytes(bytes, offset, numBytes, null);
 	}

 	public static BinaryString fromBytes(byte[] bytes, int offset, int numBytes, String javaString) {
 		return new BinaryString(
 				new MemorySegment[]{MemorySegmentFactory.wrap(bytes)}, offset, numBytes, javaString);
 	}

 	/**
 	 * Creates an BinaryString that contains `length` spaces.
 	 */
 	public static BinaryString blankString(int length) {
 		byte[] spaces = new byte[length];
 		Arrays.fill(spaces, (byte) ' ');
 		return fromBytes(spaces);
 	}

 	/**
 	 * Returns the number of bytes for a code point with the first byte as `b`.
 	 * @param b The first byte of a code point
 	 */
 	private static int numBytesForFirstByte(final byte b) {
 		if (b >= 0) {
 			// 1 byte, 7 bits: 0xxxxxxx
 			return 1;
 		} else if ((b >> 5) == -2 && (b & 0x1e) != 0) {
 			// 2 bytes, 11 bits: 110xxxxx 10xxxxxx
 			return 2;
 		} else if ((b >> 4) == -2) {
 			// 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx
 			return 3;
 		} else if ((b >> 3) == -2) {
 			// 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
 			return 4;
 		} else {
 			// throw new IllegalArgumentException();
 			// Skip the first byte disallowed in UTF-8
 			return 1;
 		}
 	}

 	public boolean isSpaceString() {
 		if (javaString != null) {
 			return javaString.equals(" ");
 		} else {
 			return getByte(0) == ' ';
 		}
 	}

 	public void ensureEncoded() {
 		if (!isEncoded()) {
 			encodeToBytes();
 		}
 	}

 	private void encodeToBytes() {
 		if (javaString != null) {
 			byte[] bytes = StringUtf8Utils.encodeUTF8(javaString);
 			pointTo(bytes, 0, bytes.length, javaString);
 		}
 	}

 	public int getOffset() {
 		ensureEncoded();
 		return offset;
 	}

 	public MemorySegment[] getSegments() {
 		ensureEncoded();
 		return segments;
 	}

 	/**
 	 * Returns the number of bytes.
 	 */
 	public int numBytes() {
 		ensureEncoded();
 		return numBytes;
 	}

 	/**
 	 * Returns the number of code points in it.
 	 */
 	public int numChars() {
 		ensureEncoded();
 		if (inOneSeg()) {
 			int len = 0;
 			for (int i = 0; i < numBytes; i += numBytesForFirstByte(getByteOneSeg(i))) {
 				len++;
 			}
 			return len;
 		} else {
 			return numCharsSlow();
 		}
 	}

 	private int numCharsSlow() {
 		int len = 0;
 		int segSize = segments[0].size();
 		SegmentAndOffset index = firstSegmentAndOffset(segSize);
 		int i = 0;
 		while (i < numBytes) {
 			int charBytes = numBytesForFirstByte(index.value());
 			i += charBytes;
 			len++;
 			index.skipBytes(charBytes, segSize);
 		}
 		return len;
 	}

 	public byte getByte(int i) {
 		ensureEncoded();
 		int globalOffset = offset + i;
 		int size = segments[0].size();
 		if (globalOffset < size) {
 			return segments[0].get(globalOffset);
 		} else {
 			return segments[globalOffset / size].get(globalOffset % size);
 		}
 	}

 	private byte getByteOneSeg(int i) {
 		return segments[0].get(offset + i);
 	}

 	@Override
 	public boolean equals(final Object o) {
 		if (o != null && o instanceof BinaryString) {
 			BinaryString other = (BinaryString) o;
 			if (javaString != null && other.javaString != null) {
 				return javaString.equals(other.javaString);
 			}

 			ensureEncoded();
 			other.ensureEncoded();
 			return numBytes == other.numBytes &&
 					BinaryRowUtil.equals(segments, offset, other.segments, other.offset, numBytes);
 		} else {
 			return false;
 		}
 	}

 	@Override
 	public int compareTo(BinaryString other) {

 		if (javaString != null && other.javaString != null) {
 			return javaString.compareTo(other.javaString);
 		}

 		ensureEncoded();
 		other.ensureEncoded();
 		if (segments.length == 1 && other.segments.length == 1) {

 			int len = Math.min(numBytes, other.numBytes);
 			MemorySegment seg1 = segments[0];
 			MemorySegment seg2 = other.segments[0];

 			for (int i = 0; i < len; i++) {
 				// We can use MemorySegment.compare.
 				// But need careful about inline.
 				int res = (seg1.get(offset + i) & 0xFF) - (seg2.get(other.offset + i) & 0xFF);
 				if (res != 0) {
 					return res;
 				}
 			}
 			return numBytes - other.numBytes;
 		}

 		// if there are multi segments.
 		return compareComplex(other);
 	}

 	/**
 	 * Find the boundaries of segments, and then compare MemorySegment.
 	 */
 	private int compareComplex(BinaryString other) {

 		if (numBytes == 0 || other.numBytes == 0) {
 			return numBytes - other.numBytes;
 		}

 		int len = Math.min(numBytes, other.numBytes);

 		MemorySegment seg1 = segments[0];
 		MemorySegment seg2 = other.segments[0];

 		int segmentSize = segments[0].size();
 		int otherSegmentSize = other.segments[0].size();

 		int sizeOfFirst1 = segmentSize - offset;
 		int sizeOfFirst2 = otherSegmentSize - other.offset;

 		int varSegIndex1 = 1;
 		int varSegIndex2 = 1;

 		// find the first segment of this string.
 		while (sizeOfFirst1 <= 0) {
 			sizeOfFirst1 += segmentSize;
 			seg1 = segments[varSegIndex1++];
 		}

 		while (sizeOfFirst2 <= 0) {
 			sizeOfFirst2 += otherSegmentSize;
 			seg2 = other.segments[varSegIndex2++];
 		}

 		int offset1 = segmentSize - sizeOfFirst1;
 		int offset2 = otherSegmentSize - sizeOfFirst2;

 		int needCompare = Math.min(Math.min(sizeOfFirst1, sizeOfFirst2), len);

 		while (needCompare > 0) {
 			// compare in one segment.
 			for (int i = 0; i < needCompare; i++) {
 				int res = (seg1.get(offset1 + i) & 0xFF) - (seg2.get(offset2 + i) & 0xFF);
 				if (res != 0) {
 					return res;
 				}
 			}
 			if (needCompare == len) {
 				break;
 			}
 			len -= needCompare;
 			// next segment
 			if (sizeOfFirst1 < sizeOfFirst2) { //I am smaller
 				seg1 = segments[varSegIndex1++];
 				offset1 = 0;
 				offset2 += needCompare;
 				sizeOfFirst1 = segmentSize;
 				sizeOfFirst2 -= needCompare;
 			} else if (sizeOfFirst1 > sizeOfFirst2) { //other is smaller
 				seg2 = other.segments[varSegIndex2++];
 				offset2 = 0;
 				offset1 += needCompare;
 				sizeOfFirst2 = otherSegmentSize;
 				sizeOfFirst1 -= needCompare;
 			} else { // same, should go ahead both.
 				seg1 = segments[varSegIndex1++];
 				seg2 = other.segments[varSegIndex2++];
 				offset1 = 0;
 				offset2 = 0;
 				sizeOfFirst1 = segmentSize;
 				sizeOfFirst2 = otherSegmentSize;
 			}
 			needCompare = Math.min(Math.min(sizeOfFirst1, sizeOfFirst2), len);
 		}

 		checkArgument(needCompare == len);

 		return numBytes - other.numBytes;
 	}

 	@Override
 	public String toString() {
 		if (javaString != null) {
 			return javaString;
 		}
 		String str;
 		if (segments.length == 1) {
 			str = StringUtf8Utils.decodeUTF8(segments[0], offset, numBytes);
 		} else {
 			byte[] bytes = StringUtf8Utils.allocateBytes(numBytes);
 			copyTo(bytes);
 			str = StringUtf8Utils.decodeUTF8(bytes, 0, numBytes);
 		}
 		this.javaString = str;
 		return str;
 	}

 	/**
 	 * Maybe not copied, if want copy, please use copyTo.
 	 */
 	public byte[] getBytes() {
 		ensureEncoded();
 		return MultiSegUtil.getBytes(segments, offset, numBytes);
 	}

 	@Override
 	public int hashCode() {
 		ensureEncoded();
 		if (segments.length == 1) {
 			return Murmur32.hashBytes(segments[0], offset, numBytes, 42);
 		} else {
 			return hashSlow();
 		}
 	}

 	private int hashSlow() {
 		return Murmur32.hashBytes(MemorySegmentFactory.wrap(getBytes()), 0, numBytes, 42);
 	}

 	public long hash64() {
 		ensureEncoded();
 		if (segments.length == 1) {
 			return Murmur32.hash64(segments[0], offset, numBytes, 42);
 		} else {
 			return hash64Slow();
 		}
 	}

 	private long hash64Slow() {
 		return Murmur32.hash64(MemorySegmentFactory.wrap(getBytes()), 0, numBytes, 42);
 	}

 	public BinaryString copy() {
 		if (segments == null) {
 			return new BinaryString(javaString);
 		} else {
 			byte[] copy = BinaryRowUtil.copy(segments, offset, numBytes);
 			return BinaryString.fromBytes(copy, 0, copy.length, javaString);
 		}
 	}

 	public BinaryString copy(BinaryString reuse) {
 		if (segments == null) {
 			reuse.pointToString(javaString);
 		} else {
 			byte[] copy = BinaryRowUtil.copy(segments, offset, numBytes);
 			reuse.pointTo(copy, 0, copy.length, javaString);
 		}
 		return reuse;
 	}

 	public BinaryString cloneReference() {
 		if (segments == null) {
 			return new BinaryString(javaString);
 		} else {
 			MemorySegment[] cloneSegs = new MemorySegment[segments.length];
 			for (int i = 0; i < segments.length; i++) {
 				cloneSegs[i] = segments[i].cloneReference();
 			}
 			return new BinaryString(cloneSegs, offset, numBytes, javaString);
 		}
 	}

 	public boolean isEncoded() {
 		return segments != null;
 	}

 	public void copyTo(byte[] bytes) {
 		ensureEncoded();
 		BinaryRowUtil.copy(segments, offset, bytes, 0, numBytes);
 	}

 	/**
 	 * Range partition use Kryo to emit local sample data.
 	 */
 	@Override
 	public void write(Kryo kryo, Output output) {
 		ensureEncoded();
 		byte[] copy = BinaryRowUtil.copy(segments, offset, numBytes);
 		output.writeInt(numBytes);
 		output.writeBytes(copy);
 	}

 	@Override
 	public void read(Kryo kryo, Input input) {
 		int numBytes = input.readInt();
 		byte[] bytes = input.readBytes(numBytes);
 		pointTo(bytes, 0, numBytes);
 	}

 	public static String safeToString(BinaryString str) {
 		if (str == null) {
 			return null;
 		} else {
 			return str.toString();
 		}
 	}

 	private boolean inOneSeg() {
 		return numBytes + offset <= segments[0].size();
 	}

 	public BinaryString substringSQL(int pos) {
 		return substringSQL(pos, Integer.MAX_VALUE);
 	}

 	public BinaryString substringSQL(int pos, int length) {
 		if (length < 0) {
 			return null;
 		}
 		ensureEncoded();
 		if (equals(EMPTY_UTF8)) {
 			return EMPTY_UTF8;
 		}

 		int start;
 		int end;
 		int numChars = numChars();

 		if (pos > 0) {
 			start = pos - 1;
 			if (start >= numChars) {
 				return EMPTY_UTF8;
 			}
 		} else if (pos < 0) {
 			start = numChars + pos;
 			if (start < 0) {
 				return EMPTY_UTF8;
 			}
 		} else {
 			start = 0;
 		}

 		if ((numChars - start) < length) {
 			end = numChars;
 		} else {
 			end = start + length;
 		}
 		return substring(start, end);
 	}

 	/**
 	 * Returns a substring of this.
 	 * @param start the position of first code point
 	 * @param until the position after last code point, exclusive.
 	 */
 	public BinaryString substring(final int start, final int until) {
 		ensureEncoded();
 		if (until <= start || start >= numBytes()) {
 			return EMPTY_UTF8;
 		}
 		if (inOneSeg()) {
 			MemorySegment segment = segments[0];
 			int i = 0;
 			int c = 0;
 			while (i < numBytes && c < start) {
 				i += numBytesForFirstByte(segment.get(i + offset));
 				c += 1;
 			}

 			int j = i;
 			while (i < numBytes && c < until) {
 				i += numBytesForFirstByte(segment.get(i + offset));
 				c += 1;
 			}

 			if (i > j) {
 				byte[] bytes = new byte[i - j];
 				segment.get(offset + j, bytes, 0, i - j);
 				return fromBytes(bytes);
 			} else {
 				return EMPTY_UTF8;
 			}
 		} else {
 			return substringSlow(start, until);
 		}
 	}

 	private BinaryString substringSlow(final int start, final int until) {
 		int segSize = segments[0].size();
 		SegmentAndOffset index = firstSegmentAndOffset(segSize);
 		int i = 0;
 		int c = 0;
 		while (i < numBytes && c < start) {
 			int charSize = numBytesForFirstByte(index.value());
 			i += charSize;
 			index.skipBytes(charSize, segSize);
 			c += 1;
 		}

 		int j = i;
 		while (i < numBytes && c < until) {
 			int charSize = numBytesForFirstByte(index.value());
 			i += charSize;
 			index.skipBytes(charSize, segSize);
 			c += 1;
 		}

 		if (i > j) {
 			return fromBytes(BinaryRowUtil.copy(segments, offset + j, i - j));
 		} else {
 			return EMPTY_UTF8;
 		}
 	}

 	/**
 	 * Concatenates input strings together into a single string.
 	 */
 	public static BinaryString concat(BinaryString... inputs) {
 		return concat(Arrays.asList(inputs));
 	}

 	/**
 	 * Concatenates input strings together into a single string.
 	 */
 	public static BinaryString concat(Iterable<BinaryString> inputs) {
 		// Compute the total length of the result.
 		int totalLength = 0;
 		for (BinaryString input : inputs) {
 			if (input != null) {
 				input.ensureEncoded();
 				totalLength += input.numBytes();
 			}
 		}

 		// Allocate a new byte array, and copy the inputs one by one into it.
 		final byte[] result = new byte[totalLength];
 		int offset = 0;
 		for (BinaryString input : inputs) {
 			if (input != null) {
 				int len = input.numBytes;
 				BinaryRowUtil.copy(input.segments, input.offset, result, offset, len);
 				offset += len;
 			}
 		}
 		return fromBytes(result);
 	}

 	/**
 	 * Concatenates input strings together into a single string using the separator.
 	 * A null input is skipped. For example, concat(",", "a", null, "c") would yield "a,c".
 	 */
 	public static BinaryString concatWs(BinaryString separator, BinaryString... inputs) {
 		return concatWs(separator, Arrays.asList(inputs));
 	}

 	/**
 	 * Concatenates input strings together into a single string using the separator.
 	 * A null input is skipped. For example, concat(",", "a", null, "c") would yield "a,c".
 	 */
 	public static BinaryString concatWs(BinaryString separator, Iterable<BinaryString> inputs) {
 		if (null == separator || EMPTY_UTF8.equals(separator)) {
 			return concat(inputs);
 		}
 		separator.ensureEncoded();

 		int numInputBytes = 0;  // total number of bytes from the inputs
 		int numInputs = 0;      // number of non-null inputs
 		for (BinaryString input : inputs) {
 			if (input != null) {
 				input.ensureEncoded();
 				numInputBytes += input.numBytes;
 				numInputs++;
 			}
 		}

 		if (numInputs == 0) {
 			// Return an empty string if there is no input, or all the inputs are null.
 			return EMPTY_UTF8;
 		}

 		// Allocate a new byte array, and copy the inputs one by one into it.
 		// The size of the new array is the size of all inputs, plus the separators.
 		final byte[] result = new byte[numInputBytes + (numInputs - 1) * separator.numBytes];
 		int offset = 0;

 		int j = 0;
 		for (BinaryString input : inputs) {
 			if (input != null) {
 				int len = input.numBytes;
 				BinaryRowUtil.copy(input.segments, input.offset, result, offset, len);
 				offset += len;

 				j++;
 				// Add separator if this is not the last input.
 				if (j < numInputs) {
 					BinaryRowUtil.copy(separator.segments, separator.offset, result, offset, separator.numBytes);
 					offset += separator.numBytes;
 				}
 			}
 		}
 		return fromBytes(result);
 	}

 	/**
 	 * Returns whether this contains `substring` or not.
 	 * Same to like '%substring%'.
 	 */
 	public boolean contains(final BinaryString substring) {
 		ensureEncoded();
 		substring.ensureEncoded();
 		if (substring.numBytes == 0) {
 			return true;
 		}
 		int find = BinaryRowUtil.find(
 				segments, offset, numBytes,
 				substring.segments, substring.offset, substring.numBytes);
 		return find != -1;
 	}

 	private boolean matchAt(final BinaryString s, int pos) {
 		return (inOneSeg() && s.inOneSeg()) ? matchAtOneSeg(s, pos) : matchAtVarSeg(s, pos);
 	}

 	private boolean matchAtOneSeg(final BinaryString s, int pos) {
 		return s.numBytes + pos <= numBytes && pos >= 0 &&
 				segments[0].equalTo(s.segments[0], offset + pos, s.offset, s.numBytes);
 	}

 	private boolean matchAtVarSeg(final BinaryString s, int pos) {
 		return s.numBytes + pos <= numBytes && pos >= 0 &&
 				BinaryRowUtil.equalsSlow(segments, offset + pos, s.segments, s.offset, s.numBytes);
 	}

 	/**
 	 * Same to like 'prefix%'.
 	 */
 	public boolean startsWith(final BinaryString prefix) {
 		ensureEncoded();
 		prefix.ensureEncoded();
 		return matchAt(prefix, 0);
 	}

 	/**
 	 * Same to like '%suffix'.
 	 */
 	public boolean endsWith(final BinaryString suffix) {
 		ensureEncoded();
 		suffix.ensureEncoded();
 		return matchAt(suffix, numBytes - suffix.numBytes);
 	}

 	private BinaryString copyBinaryStringInOneSeg(int start, int end) {
 		int len = end - start + 1;
 		byte[] newBytes = new byte[len];
 		segments[0].get(offset + start, newBytes, 0, len);
 		return fromBytes(newBytes);
 	}

 	private BinaryString copyBinaryString(int start, int end) {
 		int len = end - start + 1;
 		byte[] newBytes = new byte[len];
 		BinaryRowUtil.copy(segments, offset + start, newBytes, 0, len);
 		return fromBytes(newBytes);
 	}

 	public BinaryString trim() {
 		ensureEncoded();
 		if (inOneSeg()) {
 			int s = 0;
 			int e = this.numBytes - 1;
 			// skip all of the space (0x20) in the left side
 			while (s < this.numBytes && getByteOneSeg(s) == 0x20) {
 				s++;
 			}
 			// skip all of the space (0x20) in the right side
 			while (e >= s && getByteOneSeg(e) == 0x20) {
 				e--;
 			}
 			if (s > e) {
 				// empty string
 				return EMPTY_UTF8;
 			} else {
 				return copyBinaryStringInOneSeg(s, e);
 			}
 		} else {
 			return trimSlow();
 		}
 	}

 	private BinaryString trimSlow() {
 		int s = 0;
 		int e = this.numBytes - 1;
 		int segSize = segments[0].size();
 		SegmentAndOffset front = firstSegmentAndOffset(segSize);
 		// skip all of the space (0x20) in the left side
 		while (s < this.numBytes && front.value() == 0x20) {
 			s++;
 			front.nextByte(segSize);
 		}
 		SegmentAndOffset behind = lastSegmentAndOffset(segSize);
 		// skip all of the space (0x20) in the right side
 		while (e >= s && behind.value() == 0x20) {
 			e--;
 			behind.previousByte(segSize);
 		}
 		if (s > e) {
 			// empty string
 			return EMPTY_UTF8;
 		} else {
 			return copyBinaryString(s, e);
 		}
 	}

 	/**
 	 * Walk each character of current string from both ends, remove the character if it
 	 * is in trim string.
 	 * Return the new substring which both ends trim characters have been removed.
 	 *
 	 * @param trimStr the trim string
 	 * @return A subString which both ends trim characters have been removed.
 	 */
 	public BinaryString trim(BinaryString trimStr) {
 		if (trimStr == null) {
 			return null;
 		}
 		return trimLeft(trimStr).trimRight(trimStr);
 	}

 	public BinaryString trimLeft() {
 		ensureEncoded();
 		if (inOneSeg()) {
 			int s = 0;
 			// skip all of the space (0x20) in the left side
 			while (s < this.numBytes && getByteOneSeg(s) == 0x20) {
 				s++;
 			}
 			if (s == this.numBytes) {
 				// empty string
 				return EMPTY_UTF8;
 			} else {
 				return copyBinaryStringInOneSeg(s, this.numBytes - 1);
 			}
 		} else {
 			return trimLeftSlow();
 		}
 	}

 	private BinaryString trimLeftSlow() {
 		int s = 0;
 		int segSize = segments[0].size();
 		SegmentAndOffset front = firstSegmentAndOffset(segSize);
 		// skip all of the space (0x20) in the left side
 		while (s < this.numBytes && front.value() == 0x20) {
 			s++;
 			front.nextByte(segSize);
 		}
 		if (s == this.numBytes) {
 			// empty string
 			return EMPTY_UTF8;
 		} else {
 			return copyBinaryString(s, this.numBytes - 1);
 		}
 	}

 	/**
 	 * Walk each character of current string from left end, remove the character if it
 	 * is in trim string. Stops at the first character which is not in trim string.
 	 * Return the new substring.
 	 *
 	 * @param trimStr the trim string
 	 * @return A subString which removes all of the character from the left side that is in
 	 * trim string.
 	 */
 	public BinaryString trimLeft(BinaryString trimStr) {
 		ensureEncoded();
 		if (trimStr == null) {
 			return null;
 		}
 		trimStr.ensureEncoded();
 		if (trimStr.isSpaceString()) {
 			return trimLeft();
 		}
 		if (inOneSeg()) {
 			int searchIdx = 0;
 			while (searchIdx < this.numBytes) {
 				int charBytes = numBytesForFirstByte(getByteOneSeg(searchIdx));
 				BinaryString currentChar = copyBinaryStringInOneSeg(searchIdx,
 						searchIdx + charBytes - 1);
 				// try to find the matching for the character in the trimString characters.
 				if (trimStr.contains(currentChar)) {
 					searchIdx += charBytes;
 				} else {
 					break;
 				}
 			}
 			// empty string
 			if (searchIdx >= numBytes) {
 				return EMPTY_UTF8;
 			} else {
 				return copyBinaryStringInOneSeg(searchIdx, numBytes - 1);
 			}
 		} else {
 			return trimLeftSlow(trimStr);
 		}
 	}

 	private BinaryString trimLeftSlow(BinaryString trimStr) {
 		int searchIdx = 0;
 		int segSize = segments[0].size();
 		SegmentAndOffset front = firstSegmentAndOffset(segSize);
 		while (searchIdx < this.numBytes) {
 			int charBytes = numBytesForFirstByte(front.value());
 			BinaryString currentChar = copyBinaryString(searchIdx, searchIdx + charBytes - 1);
 			if (trimStr.contains(currentChar)) {
 				searchIdx += charBytes;
 				front.skipBytes(charBytes, segSize);
 			} else {
 				break;
 			}
 		}
 		if (searchIdx == this.numBytes) {
 			// empty string
 			return EMPTY_UTF8;
 		} else {
 			return copyBinaryString(searchIdx, this.numBytes - 1);
 		}
 	}

 	public BinaryString trimRight() {
 		ensureEncoded();
 		if (inOneSeg()) {
 			int e = numBytes - 1;
 			// skip all of the space (0x20) in the right side
 			while (e >= 0 && getByteOneSeg(e) == 0x20) {
 				e--;
 			}

 			if (e < 0) {
 				// empty string
 				return EMPTY_UTF8;
 			} else {
 				return copyBinaryStringInOneSeg(0, e);
 			}
 		} else {
 			return trimRightSlow();
 		}
 	}

 	private BinaryString trimRightSlow() {
 		int e = numBytes - 1;
 		int segSize = segments[0].size();
 		SegmentAndOffset behind = lastSegmentAndOffset(segSize);
 		// skip all of the space (0x20) in the right side
 		while (e >= 0 && behind.value() == 0x20) {
 			e--;
 			behind.previousByte(segSize);
 		}

 		if (e < 0) {
 			// empty string
 			return EMPTY_UTF8;
 		} else {
 			return copyBinaryString(0, e);
 		}
 	}

 	/**
 	 * Walk each character of current string from right end, remove the character if it
 	 * is in trim string. Stops at the first character which is not in trim string.
 	 * Return the new substring.
 	 *
 	 * @param trimStr the trim string
 	 * @return A subString which removes all of the character from the right side that is in
 	 * trim string.
 	 */
 	public BinaryString trimRight(BinaryString trimStr) {
 		ensureEncoded();
 		if (trimStr == null) {
 			return null;
 		}
 		trimStr.ensureEncoded();
 		if (trimStr.isSpaceString()) {
 			return trimRight();
 		}
 		if (inOneSeg()) {
 			int charIdx = 0;
 			int byteIdx = 0;
 			// each element in charLens is length of character in the source string
 			int[] charLens = new int[numBytes];
 			// each element in charStartPos is start position of first byte in the source string
 			int[] charStartPos = new int[numBytes];
 			while (byteIdx < numBytes) {
 				charStartPos[charIdx] = byteIdx;
 				charLens[charIdx] = numBytesForFirstByte(getByteOneSeg(byteIdx));
 				byteIdx += charLens[charIdx];
 				charIdx++;
 			}
 			// searchIdx points to the first character which is not in trim string from the right
 			// end.
 			int searchIdx = numBytes - 1;
 			charIdx -= 1;
 			while (charIdx >= 0) {
 				BinaryString currentChar = copyBinaryStringInOneSeg(
 						charStartPos[charIdx],
 						charStartPos[charIdx] + charLens[charIdx] - 1);
 				if (trimStr.contains(currentChar)) {
 					searchIdx -= charLens[charIdx];
 				} else {
 					break;
 				}
 				charIdx--;
 			}
 			if (searchIdx < 0) {
 				// empty string
 				return EMPTY_UTF8;
 			} else {
 				return copyBinaryStringInOneSeg(0, searchIdx);
 			}
 		} else {
 			return trimRightSlow(trimStr);
 		}
 	}

 	private BinaryString trimRightSlow(BinaryString trimStr) {
 		int charIdx = 0;
 		int byteIdx = 0;
 		int segSize = segments[0].size();
 		SegmentAndOffset index = firstSegmentAndOffset(segSize);
 		// each element in charLens is length of character in the source string
 		int[] charLens = new int[numBytes];
 		// each element in charStartPos is start position of first byte in the source string
 		int[] charStartPos = new int[numBytes];
 		while (byteIdx < numBytes) {
 			charStartPos[charIdx] = byteIdx;
 			int charBytes = numBytesForFirstByte(index.value());
 			charLens[charIdx] = charBytes;
 			byteIdx += charBytes;
 			charIdx++;
 			index.skipBytes(charBytes, segSize);
 		}
 		// searchIdx points to the first character which is not in trim string from the right
 		// end.
 		int searchIdx = numBytes - 1;
 		charIdx -= 1;
 		while (charIdx >= 0) {
 			BinaryString currentChar = copyBinaryString(
 					charStartPos[charIdx],
 					charStartPos[charIdx] + charLens[charIdx] - 1);
 			if (trimStr.contains(currentChar)) {
 				searchIdx -= charLens[charIdx];
 			} else {
 				break;
 			}
 			charIdx--;
 		}
 		if (searchIdx < 0) {
 			// empty string
 			return EMPTY_UTF8;
 		} else {
 			return copyBinaryString(0, searchIdx);
 		}
 	}

 	public BinaryString trim(boolean leading, boolean trailing, BinaryString seek) {
 		ensureEncoded();
 		if (seek == null) {
 			return null;
 		}
 		if (leading && trailing) {
 			return trim(seek);
 		} else if (leading) {
 			return trimLeft(seek);
 		} else if (trailing) {
 			return trimRight(seek);
 		} else {
 			return this;
 		}
 	}

 	/**
 	 * Parse target string as key-value string and
 	 * return the value matches key name.
 	 * If accept any null arguments, return null.
 	 * example:
 	 * keyvalue('k1=v1;k2=v2', ';', '=', 'k2') = 'v2'
 	 * keyvalue('k1:v1,k2:v2', ',', ':', 'k3') = NULL
 	 *
 	 * @param split1  separator between key-value tuple.
 	 * @param split2  separator between key and value.
 	 * @param keyName name of the key whose value you want return.
 	 *
 	 * @return target value.
 	 */
 	public BinaryString keyValue(byte split1, byte split2, BinaryString keyName) {
 		ensureEncoded();
 		if (keyName == null || keyName.numBytes() == 0) {
 			return null;
 		}
 		if (inOneSeg() && keyName.inOneSeg()) {
 			// position in byte
 			int byteIdx = 0;
 			// position of last split1
 			int lastSplit1Idx = -1;
 			while (byteIdx < numBytes) {
 				// If find next split1 in str, process current kv
 				if (segments[0].get(offset + byteIdx) == split1) {
 					int currentKeyIdx = lastSplit1Idx + 1;
 					// If key of current kv is keyName, return the value directly
 					BinaryString value = findValueOfKey(split2, keyName, currentKeyIdx, byteIdx);
 					if (value != null) {
 						return value;
 					}
 					lastSplit1Idx = byteIdx;
 				}
 				byteIdx++;
 			}
 			// process the string which is not ends with split1
 			int currentKeyIdx = lastSplit1Idx + 1;
 			BinaryString value = findValueOfKey(split2, keyName, currentKeyIdx, numBytes);
 			return value;
 		} else {
 			return keyValueSlow(split1, split2, keyName);
 		}
 	}

 	private BinaryString findValueOfKey(
 			byte split,
 			BinaryString keyName,
 			int start,
 			int end) {
 		int keyNameLen = keyName.numBytes;
 		for (int idx = start; idx < end; idx++) {
 			if (segments[0].get(offset + idx) == split) {
 				if (idx == start + keyNameLen &&
 					segments[0].equalTo(keyName.segments[0], offset + start,
 										keyName.offset, keyNameLen)) {
 					int valueIdx = idx + 1;
 					int valueLen = end - valueIdx;
 					byte[] bytes = new byte[valueLen];
 					segments[0].get(offset + valueIdx, bytes, 0, valueLen);
 					return fromBytes(bytes, 0, valueLen);
 				} else {
 					return null;
 				}
 			}
 		}
 		return null;
 	}

 	private BinaryString keyValueSlow(
 			byte split1,
 			byte split2,
 			BinaryString keyName) {
 		// position in byte
 		int byteIdx = 0;
 		// position of last split1
 		int lastSplit1Idx = -1;
 		while (byteIdx < numBytes) {
 			// If find next split1 in str, process current kv
 			if (getByte(byteIdx) == split1) {
 				int currentKeyIdx = lastSplit1Idx + 1;
 				BinaryString value = findValueOfKeySlow(split2, keyName, currentKeyIdx, byteIdx);
 				if (value != null) {
 					return value;
 				}
 				lastSplit1Idx = byteIdx;
 			}
 			byteIdx++;
 		}
 		int currentKeyIdx = lastSplit1Idx + 1;
 		BinaryString value = findValueOfKeySlow(split2, keyName, currentKeyIdx, numBytes);
 		return value;
 	}

 	private BinaryString findValueOfKeySlow(
 			byte split,
 			BinaryString keyName,
 			int start,
 			int end) {
 		int keyNameLen = keyName.numBytes;
 		for (int idx = start; idx < end; idx++) {
 			if (getByte(idx) == split) {
 				if (idx == start + keyNameLen &&
 					BinaryRowUtil.equals(segments, offset + start, keyName.segments,
 										keyName.offset, keyNameLen)) {
 					int valueIdx = idx + 1;
 					byte[] bytes = BinaryRowUtil.copy(segments, offset + valueIdx, end - valueIdx);
 					return fromBytes(bytes);
 				} else {
 					return null;
 				}
 			}
 		}
 		return null;
 	}

 	/**
 	 * Returns the position of the first occurence of substr in  current string starting from given
 	 * position.
 	 *
 	 * @param subStr subStr to be searched
 	 * @param start  start position
 	 * @return the position of the first occurence of substring. Return -1 if not found.
 	 */
 	public int indexOf(BinaryString subStr, int start) {
 		ensureEncoded();
 		subStr.ensureEncoded();
 		if (subStr.numBytes == 0) {
 			return 0;
 		}
 		if (inOneSeg()) {
 			// position in byte
 			int byteIdx = 0;
 			// position is char
 			int charIdx = 0;
 			while (byteIdx < numBytes && charIdx < start) {
 				byteIdx += numBytesForFirstByte(getByteOneSeg(byteIdx));
 				charIdx++;
 			}
 			do {
 				if (byteIdx + subStr.numBytes > numBytes) {
 					return -1;
 				}
 				if (BinaryRowUtil.equals(segments, offset + byteIdx,
 						subStr.segments, subStr.offset, subStr.numBytes)) {
 					return charIdx;
 				}
 				byteIdx += numBytesForFirstByte(getByteOneSeg(byteIdx));
 				charIdx++;
 			} while (byteIdx < numBytes);

 			return -1;
 		} else {
 			return indexOfSlow(subStr, start);
 		}
 	}

 	private int indexOfSlow(BinaryString subStr, int start) {
 		// position in byte
 		int byteIdx = 0;
 		// position is char
 		int charIdx = 0;
 		int segSize = segments[0].size();
 		SegmentAndOffset index = firstSegmentAndOffset(segSize);
 		while (byteIdx < numBytes && charIdx < start) {
 			int charBytes = numBytesForFirstByte(index.value());
 			byteIdx += charBytes;
 			charIdx++;
 			index.skipBytes(charBytes, segSize);
 		}
 		do {
 			if (byteIdx + subStr.numBytes > numBytes) {
 				return -1;
 			}
 			if (BinaryRowUtil.equals(segments, offset + byteIdx,
 					subStr.segments, subStr.offset, subStr.numBytes)) {
 				return charIdx;
 			}
 			int charBytes = numBytesForFirstByte(index.segment.get(index.offset));
 			byteIdx += charBytes;
 			charIdx++;
 			index.skipBytes(charBytes, segSize);
 		} while (byteIdx < numBytes);

 		return -1;
 	}

 	/**
 	 * Reverse each character in current string.
 	 *
 	 * @return a new string which character order is reverse to current string.
 	 */
 	public BinaryString reverse() {
 		ensureEncoded();
 		if (inOneSeg()) {
 			byte[] result = new byte[this.numBytes];
 			// position in byte
 			int byteIdx = 0;
 			while (byteIdx < numBytes) {
 				int charBytes = numBytesForFirstByte(getByteOneSeg(byteIdx));
 				segments[0].get(
 						offset + byteIdx,
 						result,
 						result.length - byteIdx - charBytes,
 						charBytes);
 				byteIdx += charBytes;
 			}
 			return BinaryString.fromBytes(result);
 		} else {
 			return reverseSlow();
 		}
 	}

 	private BinaryString reverseSlow() {
 		byte[] result = new byte[this.numBytes];
 		// position in byte
 		int byteIdx = 0;
 		int segSize = segments[0].size();
 		SegmentAndOffset index = firstSegmentAndOffset(segSize);
 		while (byteIdx < numBytes) {
 			int charBytes = numBytesForFirstByte(index.value());
 			BinaryRowUtil.copySlow(
 					segments,
 					offset + byteIdx,
 					result,
 					result.length - byteIdx - charBytes,
 					charBytes);
 			byteIdx += charBytes;
 			index.skipBytes(charBytes, segSize);
 		}
 		return BinaryString.fromBytes(result);
 	}

 	// TODO repeat find rfind rpad lpad split
 	// TODO upper/lower is slow?..

 	private SegmentAndOffset firstSegmentAndOffset(int segSize) {
 		int segIndex = offset / segSize;
 		return new SegmentAndOffset(segIndex, offset % segSize);
 	}

 	private SegmentAndOffset lastSegmentAndOffset(int segSize) {
 		int lastOffset = offset + numBytes - 1;
 		int segIndex = lastOffset / segSize;
 		return new SegmentAndOffset(segIndex, lastOffset % segSize);
 	}

 	private SegmentAndOffset startSegmentAndOffset(int segSize) {
 		if (inOneSeg()) {
 			return new SegmentAndOffset(0, offset);
 		}
 		else {
 			return firstSegmentAndOffset(segSize);
 		}
 	}

 	/**
 	 * CurrentSegment and positionInSegment.
 	 */
 	private class SegmentAndOffset {
 		int segIndex;
 		MemorySegment segment;
 		int offset;

 		private SegmentAndOffset(int segIndex, int offset) {
 			this.segIndex = segIndex;
 			this.segment = segments[segIndex];
 			this.offset = offset;
 		}

 		private void assignSegment() {
 			if (segIndex >= 0 && segIndex < segments.length) {
 				segment = segments[segIndex];
 			} else {
 				segment = null;
 			}
 		}

 		private void previousByte(int segSize) {
 			offset--;
 			if (offset == -1) {
 				segIndex--;
 				assignSegment();
 				offset = segSize - 1;
 			}
 		}

 		private void nextByte(int segSize) {
 			offset++;
 			checkAdvance(segSize);
 		}

 		private void checkAdvance(int segSize) {
 			if (offset == segSize) {
 				advance();
 			}
 		}

 		private void advance() {
 			segIndex++;
 			assignSegment();
 			offset = 0;
 		}

 		private void skipBytes(int n, int segSize) {
 			int remaining = segSize - this.offset;
 			if (remaining > n) {
 				this.offset += n;
 			} else {
 				while (true) {
 					int toSkip = Math.min(remaining, n);
 					n -= toSkip;
 					if (n <= 0) {
 						this.offset += toSkip;
 						checkAdvance(segSize);
 						return;
 					}
 					advance();
 					remaining = segSize - this.offset;
 				}
 			}
 		}

 		private byte value() {
 			return this.segment.get(this.offset);
 		}
 	}

 	/**
 	 * Parses this BinaryString to Long.
 	 *
 	 * <p>Note that, in this method we accumulate the result in negative format, and convert it to
 	 * positive format at the end, if this string is not started with '-'. This is because min value
 	 * is bigger than max value in digits, e.g. Long.MAX_VALUE is '9223372036854775807' and
 	 * Long.MIN_VALUE is '-9223372036854775808'.
 	 *
 	 * <p>This code is mostly copied from LazyLong.parseLong in Hive.
 	 * @return Long value if the parsing was successful else null.
 	 */
 	public Long toLong() {
 		ensureEncoded();
 		if (numBytes == 0) {
 			return null;
 		}
 		int size = segments[0].size();
 		SegmentAndOffset segmentAndOffset = startSegmentAndOffset(size);
 		int totalOffset = 0;

 		byte b = segmentAndOffset.value();
 		final boolean negative = b == '-';
 		if (negative || b == '+') {
 			segmentAndOffset.nextByte(size);
 			totalOffset++;
 			if (numBytes == 1) {
 				return null;
 			}
 		}

 		long result = 0;
 		final byte separator = '.';
 		final int radix = 10;
 		final long stopValue = Long.MIN_VALUE / radix;
 		while (totalOffset < this.numBytes) {
 			b = segmentAndOffset.value();
 			totalOffset++;
 			segmentAndOffset.nextByte(size);
 			if (b == separator) {
 				// We allow decimals and will return a truncated integral in that case.
 				// Therefore we won't throw an exception here (checking the fractional
 				// part happens below.)
 				break;
 			}

 			int digit;
 			if (b >= '0' && b <= '9') {
 				digit = b - '0';
 			} else {
 				return null;
 			}

 			// We are going to process the new digit and accumulate the result. However, before
 			// doing this, if the result is already smaller than the
 			// stopValue(Long.MIN_VALUE / radix), then result * 10 will definitely be smaller
 			// than minValue, and we can stop.
 			if (result < stopValue) {
 				return null;
 			}

 			result = result * radix - digit;
 			// Since the previous result is less than or equal to
 			// stopValue(Long.MIN_VALUE / radix), we can just use `result > 0` to check overflow.
 			// If result overflows, we should stop.
 			if (result > 0) {
 				return null;
 			}
 		}

 		// This is the case when we've encountered a decimal separator. The fractional
 		// part will not change the number, but we will verify that the fractional part
 		// is well formed.
 		while (totalOffset < numBytes) {
 			byte currentByte = segmentAndOffset.value();
 			if (currentByte < '0' || currentByte > '9') {
 				return null;
 			}
 			totalOffset++;
 			segmentAndOffset.nextByte(size);
 		}

 		if (!negative) {
 			result = -result;
 			if (result < 0) {
 				return null;
 			}
 		}
 		return result;
 	}

 	/**
 	 * Parses this BinaryString to Int.
 	 *
 	 * <p>Note that, in this method we accumulate the result in negative format, and convert it to
 	 * positive format at the end, if this string is not started with '-'. This is because min value
 	 * is bigger than max value in digits, e.g. Integer.MAX_VALUE is '2147483647' and
 	 * Integer.MIN_VALUE is '-2147483648'.
 	 *
 	 * <p>This code is mostly copied from LazyInt.parseInt in Hive.
 	 *
 	 * <p>Note that, this method is almost same as `toLong`, but we leave it duplicated for performance
 	 * reasons, like Hive does.
 	 * @return Integer value if the parsing was successful else null.
 	 */
 	public Integer toInt() {
 		ensureEncoded();
 		if (numBytes == 0) {
 			return null;
 		}
 		int size = segments[0].size();
 		SegmentAndOffset segmentAndOffset = startSegmentAndOffset(size);
 		int totalOffset = 0;

 		byte b = segmentAndOffset.value();
 		final boolean negative = b == '-';
 		if (negative || b == '+') {
 			segmentAndOffset.nextByte(size);
 			totalOffset++;
 			if (numBytes == 1) {
 				return null;
 			}
 		}

 		int result = 0;
 		final byte separator = '.';
 		final int radix = 10;
 		final long stopValue = Integer.MIN_VALUE / radix;
 		while (totalOffset < this.numBytes) {
 			b = segmentAndOffset.value();
 			totalOffset++;
 			segmentAndOffset.nextByte(size);
 			if (b == separator) {
 				// We allow decimals and will return a truncated integral in that case.
 				// Therefore we won't throw an exception here (checking the fractional
 				// part happens below.)
 				break;
 			}

 			int digit;
 			if (b >= '0' && b <= '9') {
 				digit = b - '0';
 			} else {
 				return null;
 			}

 			// We are going to process the new digit and accumulate the result. However, before
 			// doing this, if the result is already smaller than the
 			// stopValue(Long.MIN_VALUE / radix), then result * 10 will definitely be smaller
 			// than minValue, and we can stop.
 			if (result < stopValue) {
 				return null;
 			}

 			result = result * radix - digit;
 			// Since the previous result is less than or equal to
 			// stopValue(Long.MIN_VALUE / radix), we can just use `result > 0` to check overflow.
 			// If result overflows, we should stop.
 			if (result > 0) {
 				return null;
 			}
 		}

 		// This is the case when we've encountered a decimal separator. The fractional
 		// part will not change the number, but we will verify that the fractional part
 		// is well formed.
 		while (totalOffset < numBytes) {
 			byte currentByte = segmentAndOffset.value();
 			if (currentByte < '0' || currentByte > '9') {
 				return null;
 			}
 			totalOffset++;
 			segmentAndOffset.nextByte(size);
 		}

 		if (!negative) {
 			result = -result;
 			if (result < 0) {
 				return null;
 			}
 		}
 		return result;
 	}

 	public Short toShort() {
 		Integer intValue = toInt();
 		if (intValue != null) {
 			short result = intValue.shortValue();
 			if (result == intValue) {
 				return result;
 			}
 		}
 		return null;
 	}

 	public Byte toByte() {
 		Integer intValue = toInt();
 		if (intValue != null) {
 			byte result = intValue.byteValue();
 			if (result == intValue) {
 				return result;
 			}
 		}
 		return null;
 	}

 	public Double toDouble() {
 		try {
 			return Double.valueOf(toString());
 		} catch (NumberFormatException e) {
 			return null;
 		}
 	}

 	public Float toFloat() {
 		try {
 			return Float.valueOf(toString());
 		} catch (NumberFormatException e) {
 			return null;
 		}
 	}

 	/**
 	 * Parses this BinaryString to Decimal.
 	 *
 	 * @return Decimal value if the parsing was successful, or null if overflow
 	 * @throws NumberFormatException if the parsing failed.
 	 */
 	public Decimal toDecimal(int precision, int scale) {
 		ensureEncoded();
 		if (precision > Decimal.MAX_LONG_DIGITS || this.numBytes > Decimal.MAX_LONG_DIGITS) {
 			return toDecimalSlow(precision, scale);
 		}

 		// Data in Decimal is stored by one long value if `precision` <= Decimal.MAX_LONG_DIGITS.
 		// In this case we can directly extract the value from memory segment.
 		int size = getSegments()[0].size();
 		SegmentAndOffset segmentAndOffset = startSegmentAndOffset(size);
 		int totalOffset = 0;

 		// Remove white spaces at the beginning
 		byte b = 0;
 		while (totalOffset < this.numBytes) {
 			b = segmentAndOffset.value();
 			if (b != ' ' && b != '\n' && b != '\t') {
 				break;
 			}
 			totalOffset++;
 			segmentAndOffset.nextByte(size);
 		}
 		if (totalOffset == this.numBytes) {
 			// all whitespaces
 			return null;
 		}

 		// ======= Significand part begin =======
 		final boolean negative = b == '-';
 		if (negative || b == '+') {
 			segmentAndOffset.nextByte(size);
 			totalOffset++;
 			if (totalOffset == this.numBytes) {
 				// only contains prefix plus/minus
 				return null;
 			}
 		}

 		long significand = 0;
 		int exp = 0;
 		int significandLen = 0, pointPos = -1;

 		while (totalOffset < this.numBytes) {
 			b = segmentAndOffset.value();
 			totalOffset++;
 			segmentAndOffset.nextByte(size);

 			if (b >= '0' && b <= '9') {
 				// No need to worry about overflow, because this.numBytes <= Decimal.MAX_LONG_DIGITS
 				significand = significand * 10 + (b - '0');
 				significandLen++;
 			} else if (b == '.') {
 				if (pointPos >= 0) {
 					// More than one decimal point
 					return null;
 				}
 				pointPos = significandLen;
 			} else {
 				break;
 			}
 		}

 		if (pointPos < 0) {
 			pointPos = significandLen;
 		}
 		if (negative) {
 			significand = -significand;
 		}
 		// ======= Significand part end =======

 		// ======= Exponential part begin =======
 		if ((b == 'e' || b == 'E') && totalOffset < this.numBytes) {
 			b = segmentAndOffset.value();
 			final boolean expNegative = b == '-';
 			if (expNegative || b == '+') {
 				segmentAndOffset.nextByte(size);
 				totalOffset++;
 				if (totalOffset == this.numBytes) {
 					return null;
 				}
 			}

 			int expDigits = 0;
 			// As `precision` <= 18, value absolute range is limited to 10^-18 ~ 10^18.
 			// The worst case is <18-digits>E-36
 			final int expStopValue = 40;

 			while (totalOffset < this.numBytes) {
 				b = segmentAndOffset.value();
 				totalOffset++;
 				segmentAndOffset.nextByte(size);

 				if (b >= '0' && b <= '9') {
 					// No need to worry about larger exponents,
 					// because they will produce overflow or underflow
 					if (expDigits < expStopValue) {
 						expDigits = expDigits * 10 + (b - '0');
 					}
 				} else {
 					break;
 				}
 			}

 			if (expNegative) {
 				expDigits = -expDigits;
 			}
 			exp += expDigits;
 		}
 		exp -= significandLen - pointPos;
 		// ======= Exponential part end =======

 		// Check for invalid character at the end
 		while (totalOffset < this.numBytes) {
 			b = segmentAndOffset.value();
 			totalOffset++;
 			segmentAndOffset.nextByte(size);
 			// White spaces are allowed at the end
 			if (b != ' ' && b != '\n' && b != '\t') {
 				return null;
 			}
 		}

 		// Round exp to scale
 		int change = exp + scale;
 		if (significandLen + change > precision) {
 			// Overflow
 			return null;
 		}
 		if (change >= 0) {
 			significand *= Decimal.POW10[change];
 		} else {
 			int k = negative ? -5 : 5;
 			significand = (significand + k * Decimal.POW10[-change - 1]) / Decimal.POW10[-change];
 		}
 		return Decimal.fromLong(significand, precision, scale);
 	}

 	private Decimal toDecimalSlow(int precision, int scale) {
 		// As data in Decimal is currently stored by BigDecimal if `precision` > Decimal.MAX_LONG_DIGITS,
 		// and BigDecimal only supports String or char[] for its constructor,
 		// we can't directly extract the value from BinaryString.
 		//
 		// As BigDecimal(char[], int, int) is faster than BigDecimal(String, int, int),
 		// we extract char[] from the memory segment and pass it to the constructor of BigDecimal.
 		char[] chars = StringUtf8Utils.allocateChars(numBytes);
 		int len;
 		if (segments.length == 1) {
 			len = StringUtf8Utils.decodeUTF8Strict(segments[0], offset, numBytes, chars);
 		} else {
 			byte[] bytes = StringUtf8Utils.allocateBytes(numBytes);
 			copyTo(bytes);
 			len = StringUtf8Utils.decodeUTF8Strict(bytes, 0, numBytes, chars);
 		}

 		if (len < 0) {
 			return null;
 		} else {
 			// Trim white spaces
 			int start = 0, end = len;
 			for (int i = 0; i < len; i++) {
 				if (chars[i] != ' ' && chars[i] != '\n' && chars[i] != '\t') {
 					start = i;
 					break;
 				}
 			}
 			for (int i = len - 1; i >= 0; i--) {
 				if (chars[i] != ' ' && chars[i] != '\n' && chars[i] != '\t') {
 					end = i + 1;
 					break;
 				}
 			}
 			try {
 				BigDecimal bd = new BigDecimal(chars, start, end - start);
 				return Decimal.fromBigDecimal(bd, precision, scale);
 			} catch (NumberFormatException nfe) {
 				return null;
 			}
 		}
 	}

 	/**
 	 * Returns the upper case of this string.
 	 */
 	public BinaryString toUpperCase() {
 		if (javaString != null) {
 			return toUpperCaseSlow();
 		}
 		if (numBytes == 0) {
 			return EMPTY_UTF8;
 		}
 		int size = segments[0].size();
 		SegmentAndOffset segmentAndOffset = startSegmentAndOffset(size);
 		byte[] bytes = new byte[numBytes];
 		bytes[0] = (byte) Character.toTitleCase(segmentAndOffset.value());
 		for (int i = 0; i < numBytes; i++) {
 			byte b = segmentAndOffset.value();
 			if (numBytesForFirstByte(b) != 1) {
 				// fallback
 				return toUpperCaseSlow();
 			}
 			int upper = Character.toUpperCase((int) b);
 			if (upper > 127) {
 				// fallback
 				return toUpperCaseSlow();
 			}
 			bytes[i] = (byte) upper;
 			segmentAndOffset.nextByte(size);
 		}
 		return fromBytes(bytes);
 	}

 	private BinaryString toUpperCaseSlow() {
 		return fromString(toString().toUpperCase());
 	}

 	/**
 	 * Returns the lower case of this string.
 	 */
 	public BinaryString toLowerCase() {
 		if (javaString != null) {
 			return toLowerCaseSlow();
 		}
 		if (numBytes == 0) {
 			return EMPTY_UTF8;
 		}
 		int size = segments[0].size();
 		SegmentAndOffset segmentAndOffset = startSegmentAndOffset(size);
 		byte[] bytes = new byte[numBytes];
 		bytes[0] = (byte) Character.toTitleCase(segmentAndOffset.value());
 		for (int i = 0; i < numBytes; i++) {
 			byte b = segmentAndOffset.value();
 			if (numBytesForFirstByte(b) != 1) {
 				// fallback
 				return toLowerCaseSlow();
 			}
 			int lower = Character.toLowerCase((int) b);
 			if (lower > 127) {
 				// fallback
 				return toLowerCaseSlow();
 			}
 			bytes[i] = (byte) lower;
 			segmentAndOffset.nextByte(size);
 		}
 		return fromBytes(bytes);
 	}

 	private BinaryString toLowerCaseSlow() {
 		return fromString(toString().toLowerCase());
 	}

 	/**
 	 * <p>Splits the provided text into an array, separator string specified. </p>
 	 *
 	 * <p>The separator is not included in the returned String array.
 	 * Adjacent separators are treated as separators for empty tokens.</p>
 	 *
 	 * <p>A {@code null} separator splits on whitespace.</p>
 	 *
 	 * <pre>
 	 * "".splitByWholeSeparatorPreserveAllTokens(*)                 = []
 	 * "ab de fg".splitByWholeSeparatorPreserveAllTokens(null)      = ["ab", "de", "fg"]
 	 * "ab   de fg".splitByWholeSeparatorPreserveAllTokens(null)    = ["ab", "", "", "de", "fg"]
 	 * "ab:cd:ef".splitByWholeSeparatorPreserveAllTokens(":")       = ["ab", "cd", "ef"]
 	 * "ab-!-cd-!-ef".splitByWholeSeparatorPreserveAllTokens("-!-") = ["ab", "cd", "ef"]
 	 * </pre>
 	 *
 	 * <p>Note: return BinaryStrings is reuse MemorySegments from this.</p>
 	 *
 	 * @param separator  String containing the String to be used as a delimiter,
 	 *  {@code null} splits on whitespace
 	 * @return an array of parsed Strings, {@code null} if null String was input
 	 * @since 2.4
 	 */
 	public BinaryString[] splitByWholeSeparatorPreserveAllTokens(BinaryString separator) {
 		ensureEncoded();
 		final int len = numBytes;

 		if (len == 0) {
 			return EMPTY_STRING_ARRAY;
 		}

 		if (separator == null || EMPTY_UTF8.equals(separator)) {
 			// Split on whitespace.
 			return splitByWholeSeparatorPreserveAllTokens(fromString(" "));
 		}
 		separator.ensureEncoded();

 		final int separatorLength = separator.numBytes;

 		final ArrayList<BinaryString> substrings = new ArrayList<>();
 		int beg = 0;
 		int end = 0;
 		while (end < len) {
 			end = BinaryRowUtil.find(
 					segments, offset + beg, numBytes - beg,
 					separator.segments, separator.offset, separator.numBytes) - offset;

 			if (end > -1) {
 				if (end > beg) {

 					// The following is OK, because String.substring( beg, end ) excludes
 					// the character at the position 'end'.
 					substrings.add(BinaryString.fromAddress(segments, offset + beg, end - beg));

 					// Set the starting point for the next search.
 					// The following is equivalent to beg = end + (separatorLength - 1) + 1,
 					// which is the right calculation:
 					beg = end + separatorLength;
 				} else {
 					// We found a consecutive occurrence of the separator.
 					substrings.add(EMPTY_UTF8);
 					beg = end + separatorLength;
 				}
 			} else {
 				// String.substring( beg ) goes from 'beg' to the end of the String.
 				substrings.add(BinaryString.fromAddress(segments, offset + beg, numBytes - beg));
 				end = len;
 			}
 		}

 		return substrings.toArray(new BinaryString[substrings.size()]);
 	}

 	/**
 	 * Calculate the hash value of a given string use {@link MessageDigest}.
 	 */
 	public BinaryString hash(MessageDigest md) {
 		return fromString(Hex.encodeHexString(md.digest(getBytes())));
 	}

 	public BinaryString hash(String algorithm) throws NoSuchAlgorithmException {
 		return hash(MessageDigest.getInstance(algorithm));
 	}

 	private static final List<BinaryString> TRUE_STRINGS =
 			Stream
 					.of("t", "true", "y", "yes", "1")
 					.map(BinaryString::fromString)
 					.peek(BinaryString::ensureEncoded)
 					.collect(Collectors.toList());

 	private static final List<BinaryString> FALSE_STRINGS =
 			Stream
 					.of("f", "false", "n", "no", "0")
 					.map(BinaryString::fromString)
 					.peek(BinaryString::ensureEncoded)
 					.collect(Collectors.toList());

 	/**
 	 * Decide boolean representation of a string.
 	 */
 	public Boolean toBooleanSQL() {
 		if (TRUE_STRINGS.contains(toLowerCase())) {
 			return true;
 		} else if (FALSE_STRINGS.contains(toLowerCase())) {
 			return false;
 		} else {
 			return null;
 		}
 	}
 }