blob: 382bc17a978064a102ccfeb643b3f21c8a2b31d4 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.types;
import org.apache.flink.annotation.Public;
import org.apache.flink.core.memory.DataInputView;
import org.apache.flink.core.memory.DataOutputView;
import org.apache.flink.core.memory.MemorySegment;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.nio.CharBuffer;
import static org.apache.flink.util.Preconditions.checkNotNull;
/**
* Mutable string data type that implements the Key interface.
* StringValue encapsulates the basic functionality of a {@link String}, in a serializable and mutable way.
* <p>
* The mutability allows to reuse the object inside the user code, also across invocations. Reusing a StringValue object
* helps to increase the performance, as string objects are rather heavy-weight objects and incur a lot of garbage
* collection overhead, if created and destroyed en masse.
*
* @see org.apache.flink.types.NormalizableKey
* @see java.lang.String
* @see java.lang.CharSequence
*/
@Public
public class StringValue implements NormalizableKey<StringValue>, CharSequence, ResettableValue<StringValue>,
CopyableValue<StringValue>, Appendable
{
private static final long serialVersionUID = 1L;
private static final char[] EMPTY_STRING = new char[0];
private static final int HIGH_BIT = 0x1 << 7;
private static final int HIGH_BIT2 = 0x1 << 13;
private static final int HIGH_BIT2_MASK = 0x3 << 6;
private char[] value; // character value of the string value, not necessarily completely filled
private int len; // length of the string value
private int hashCode; // cache for the hashCode
// --------------------------------------------------------------------------------------------
// Constructors
// --------------------------------------------------------------------------------------------
/**
* Initializes the encapsulated String object with an empty string.
*/
public StringValue() {
this.value = EMPTY_STRING;
}
/**
* Initializes this StringValue to the value of the given string.
*
* @param value The string containing the value for this StringValue.
*/
public StringValue(CharSequence value) {
this.value = EMPTY_STRING;
setValue(value);
}
/**
* Initializes this StringValue to a copy the given StringValue.
*
* @param value The initial value.
*/
public StringValue(StringValue value) {
this.value = EMPTY_STRING;
setValue(value);
}
/**
* Initializes the StringValue to a sub-string of the given StringValue.
*
* @param value The string containing the substring.
* @param offset The offset of the substring.
* @param len The length of the substring.
*/
public StringValue(StringValue value, int offset, int len) {
this.value = EMPTY_STRING;
setValue(value, offset, len);
}
// --------------------------------------------------------------------------------------------
// Getters and Setters
// --------------------------------------------------------------------------------------------
/**
* Sets a new length for the string.
*
* @param len The new length.
*/
public void setLength(int len) {
if (len < 0 || len > this.len) {
throw new IllegalArgumentException("Length must be between 0 and the current length.");
}
this.len = len;
}
/**
* Returns this StringValue's internal character data. The array might be larger than the string
* which is currently stored in the StringValue.
*
* @return The character data.
*/
public char[] getCharArray() {
return this.value;
}
/**
* Gets this StringValue as a String.
*
* @return A String resembling the contents of this StringValue.
*/
public String getValue() {
return toString();
}
/**
* Sets the value of the StringValue to the given string.
*
* @param value The new string value.
*/
public void setValue(CharSequence value) {
checkNotNull(value);
setValue(value, 0, value.length());
}
/**
* Sets the value of the StringValue to the given string.
*
* @param value The new string value.
*/
@Override
public void setValue(StringValue value) {
checkNotNull(value);
setValue(value.value, 0, value.len);
}
/**
* Sets the value of the StringValue to a substring of the given string.
*
* @param value The new string value.
* @param offset The position to start the substring.
* @param len The length of the substring.
*/
public void setValue(StringValue value, int offset, int len) {
checkNotNull(value);
setValue(value.value, offset, len);
}
/**
* Sets the value of the StringValue to a substring of the given string.
*
* @param value The new string value.
* @param offset The position to start the substring.
* @param len The length of the substring.
*/
public void setValue(CharSequence value, int offset, int len) {
checkNotNull(value);
if (offset < 0 || len < 0 || offset > value.length() - len) {
throw new IndexOutOfBoundsException("offset: " + offset + " len: " + len + " value.len: " + len);
}
ensureSize(len);
this.len = len;
for (int i = 0; i < len; i++) {
this.value[i] = value.charAt(offset + i);
}
this.len = len;
this.hashCode = 0;
}
/**
* Sets the contents of this string to the contents of the given <tt>CharBuffer</tt>.
* The characters between the buffer's current position (inclusive) and the buffer's
* limit (exclusive) will be stored in this string.
*
* @param buffer The character buffer to read the characters from.
*/
public void setValue(CharBuffer buffer) {
checkNotNull(buffer);
final int len = buffer.length();
ensureSize(len);
buffer.get(this.value, 0, len);
this.len = len;
this.hashCode = 0;
}
/**
* Sets the value of the StringValue to a substring of the given value.
*
* @param chars The new string value (as a character array).
* @param offset The position to start the substring.
* @param len The length of the substring.
*/
public void setValue(char[] chars, int offset, int len) {
checkNotNull(chars);
if (offset < 0 || len < 0 || offset > chars.length - len) {
throw new IndexOutOfBoundsException();
}
ensureSize(len);
System.arraycopy(chars, offset, this.value, 0, len);
this.len = len;
this.hashCode = 0;
}
/**
* Sets the value of this <code>StringValue</code>, assuming that the binary data is ASCII coded. The n-th character of the
* <code>StringValue</code> corresponds directly to the n-th byte in the given array after the offset.
*
* @param bytes The binary character data.
* @param offset The offset in the array.
* @param len The number of bytes to read from the array.
*/
public void setValueAscii(byte[] bytes, int offset, int len) {
if (bytes == null) {
throw new NullPointerException("Bytes must not be null");
}
if (len < 0 | offset < 0 | offset > bytes.length - len) {
throw new IndexOutOfBoundsException();
}
ensureSize(len);
this.len = len;
this.hashCode = 0;
final char[] chars = this.value;
for (int i = 0, limit = offset + len; offset < limit; offset++, i++) {
chars[i] = (char) (bytes[offset] & 0xff);
}
}
// --------------------------------------------------------------------------------------------
// String Methods
// --------------------------------------------------------------------------------------------
/**
* Returns a new <tt>StringValue</tt>string that is a substring of this string. The
* substring begins at the given <code>start</code> index and ends at end of the string
*
* @param start The beginning index, inclusive.
* @return The substring.
* @exception IndexOutOfBoundsException Thrown, if the start is negative.
*/
public StringValue substring(int start) {
return substring(start, this.len);
}
/**
* Returns a new <tt>StringValue</tt>string that is a substring of this string. The
* substring begins at the given <code>start</code> index and ends at <code>end - 1</code>.
*
* @param start The beginning index, inclusive.
* @param end The ending index, exclusive.
* @return The substring.
* @exception IndexOutOfBoundsException
* Thrown, if the start is negative, or the end is larger than the length.
*/
public StringValue substring(int start, int end) {
return new StringValue(this, start, end - start);
}
/**
* Copies a substring of this string into the given target StringValue. The
* substring begins at the given <code>start</code> index and ends at end of the string
*
* @param target The StringValue object to copy the substring to.
* @param start The beginning index, inclusive.
* @exception IndexOutOfBoundsException Thrown, if the start is negative.
*/
public void substring(StringValue target, int start) {
substring(target, start, this.len);
}
/**
* Copies a substring of this string into the given target StringValue. The
* substring begins at the given <code>start</code> index and ends at <code>end - 1</code>.
*
* @param target The StringValue object to copy the substring to.
* @param start The beginning index, inclusive.
* @param end The ending index, exclusive.
* @exception IndexOutOfBoundsException
* Thrown, if the start is negative, or the end is larger than the length.
*/
public void substring(StringValue target, int start, int end) {
target.setValue(this, start, end - start);
}
/**
* Finds any occurrence of the <code>str</code> character sequence in this StringValue.
*
* @return The position of the first occurrence of the search string in the string value, or <code>-1</code>, if
* the character sequence was not found.
*/
public int find(CharSequence str) {
return find(str, 0);
}
/**
* Finds any occurrence of the <code>str</code> character sequence in this StringValue.
* The search starts at position <code>start</code>.
*
* @return The position of the first occurrence of the search string in the string value, or <code>-1</code>, if
* the character sequence was not found.
*/
public int find(CharSequence str, int start) {
final int pLen = this.len;
final int sLen = str.length();
if (sLen == 0) {
throw new IllegalArgumentException("Cannot find empty string.");
}
int pPos = start;
final char first = str.charAt(0);
while (pPos < pLen) {
if (first == this.value[pPos++]) {
// matching first character
final int fallBackPosition = pPos;
int sPos = 1;
boolean found = true;
while (sPos < sLen) {
if (pPos >= pLen) {
// no more characters in string value
pPos = fallBackPosition;
found = false;
break;
}
if (str.charAt(sPos++) != this.value[pPos++]) {
pPos = fallBackPosition;
found = false;
break;
}
}
if (found) {
return fallBackPosition - 1;
}
}
}
return -1;
}
/**
* Checks whether the substring, starting at the specified index, starts with the given prefix string.
*
* @param prefix The prefix character sequence.
* @param startIndex The position to start checking for the prefix.
*
* @return True, if this StringValue substring, starting at position <code>startIndex</code> has <code>prefix</code>
* as its prefix.
*/
public boolean startsWith(CharSequence prefix, int startIndex) {
final char[] thisChars = this.value;
final int pLen = this.len;
final int sLen = prefix.length();
if ((startIndex < 0) || (startIndex > pLen - sLen)) {
return false;
}
int sPos = 0;
while (sPos < sLen) {
if (thisChars[startIndex++] != prefix.charAt(sPos++)) {
return false;
}
}
return true;
}
/**
* Checks whether this StringValue starts with the given prefix string.
*
* @param prefix The prefix character sequence.
*
* @return True, if this StringValue has <code>prefix</code> as its prefix.
*/
public boolean startsWith(CharSequence prefix) {
return startsWith(prefix, 0);
}
// --------------------------------------------------------------------------------------------
// Appendable Methods
// --------------------------------------------------------------------------------------------
/*
* (non-Javadoc)
* @see java.lang.Appendable#append(char)
*/
@Override
public Appendable append(char c) {
grow(this.len + 1);
this.value[this.len++] = c;
return this;
}
/*
* (non-Javadoc)
* @see java.lang.Appendable#append(java.lang.CharSequence)
*/
@Override
public Appendable append(CharSequence csq) {
append(csq, 0, csq.length());
return this;
}
/*
* (non-Javadoc)
* @see java.lang.Appendable#append(java.lang.CharSequence, int, int)
*/
@Override
public Appendable append(CharSequence csq, int start, int end) {
final int otherLen = end - start;
grow(this.len + otherLen);
for (int pos = start; pos < end; pos++) {
this.value[this.len + pos] = csq.charAt(pos);
}
this.len += otherLen;
return this;
}
/*
* (non-Javadoc)
* @see java.lang.Appendable#append(java.lang.CharSequence)
*/
public Appendable append(StringValue csq) {
append(csq, 0, csq.length());
return this;
}
/*
* (non-Javadoc)
* @see java.lang.Appendable#append(java.lang.CharSequence, int, int)
*/
public Appendable append(StringValue csq, int start, int end) {
final int otherLen = end - start;
grow(this.len + otherLen);
System.arraycopy(csq.value, start, this.value, this.len, otherLen);
this.len += otherLen;
return this;
}
// --------------------------------------------------------------------------------------------
// Serialization / De-Serialization
// --------------------------------------------------------------------------------------------
@Override
public void read(final DataInputView in) throws IOException {
int len = in.readUnsignedByte();
if (len >= HIGH_BIT) {
int shift = 7;
int curr;
len = len & 0x7f;
while ((curr = in.readUnsignedByte()) >= HIGH_BIT) {
len |= (curr & 0x7f) << shift;
shift += 7;
}
len |= curr << shift;
}
this.len = len;
this.hashCode = 0;
ensureSize(len);
final char[] data = this.value;
for (int i = 0; i < len; i++) {
int c = in.readUnsignedByte();
if (c < HIGH_BIT) {
data[i] = (char) c;
} else {
int shift = 7;
int curr;
c = c & 0x7f;
while ((curr = in.readUnsignedByte()) >= HIGH_BIT) {
c |= (curr & 0x7f) << shift;
shift += 7;
}
c |= curr << shift;
data[i] = (char) c;
}
}
}
@Override
public void write(final DataOutputView out) throws IOException {
int len = this.len;
// write the length, variable-length encoded
while (len >= HIGH_BIT) {
out.write(len | HIGH_BIT);
len >>>= 7;
}
out.write(len);
// write the char data, variable length encoded
for (int i = 0; i < this.len; i++) {
int c = this.value[i];
while (c >= HIGH_BIT) {
out.write(c | HIGH_BIT);
c >>>= 7;
}
out.write(c);
}
}
// --------------------------------------------------------------------------------------------
@Override
public String toString() {
return new String(this.value, 0, this.len);
}
@Override
public int compareTo(StringValue other) {
int len1 = this.len;
int len2 = other.len;
int n = Math.min(len1, len2);
char[] v1 = value;
char[] v2 = other.value;
for (int k = 0; k < n; k++) {
char c1 = v1[k];
char c2 = v2[k];
if (c1 != c2) {
return c1 - c2;
}
}
return len1 - len2;
}
@Override
public int hashCode() {
int h = this.hashCode;
if (h == 0 && this.len > 0) {
int off = 0;
char[] val = this.value;
int len = this.len;
for (int i = 0; i < len; i++) {
h = 31 * h + val[off++];
}
this.hashCode = h;
}
return h;
}
@Override
public boolean equals(final Object obj) {
if (this == obj) {
return true;
}
if (obj instanceof StringValue) {
final StringValue other = (StringValue) obj;
int len = this.len;
if (len == other.len) {
final char[] tc = this.value;
final char[] oc = other.value;
int i = 0, j = 0;
while (len-- != 0) {
if (tc[i++] != oc[j++]) {
return false;
}
}
return true;
}
}
return false;
}
// --------------------------------------------------------------------------------------------
// Char Sequence Implementation
// --------------------------------------------------------------------------------------------
@Override
public int length() {
return this.len;
}
@Override
public char charAt(int index) {
if (index < len) {
return this.value[index];
}
else {
throw new IndexOutOfBoundsException();
}
}
@Override
public CharSequence subSequence(int start, int end) {
return new StringValue(this, start, end - start);
}
// --------------------------------------------------------------------------------------------
// Normalized Key
// --------------------------------------------------------------------------------------------
@Override
public int getMaxNormalizedKeyLen() {
return Integer.MAX_VALUE;
}
@Override
public void copyNormalizedKey(MemorySegment target, int offset, int len) {
// cache variables on stack, avoid repeated dereferencing of "this"
final char[] chars = this.value;
final int limit = offset + len;
final int end = this.len;
int pos = 0;
while (pos < end && offset < limit) {
char c = chars[pos++];
if (c < HIGH_BIT) {
target.put(offset++, (byte) c);
}
else if (c < HIGH_BIT2) {
target.put(offset++, (byte) ((c >>> 7) | HIGH_BIT));
if (offset < limit) {
target.put(offset++, (byte) c);
}
}
else {
target.put(offset++, (byte) ((c >>> 10) | HIGH_BIT2_MASK));
if (offset < limit) {
target.put(offset++, (byte) (c >>> 2));
}
if (offset < limit) {
target.put(offset++, (byte) c);
}
}
}
while (offset < limit) {
target.put(offset++, (byte) 0);
}
}
// --------------------------------------------------------------------------------------------
@Override
public int getBinaryLength() {
return -1;
}
@Override
public void copyTo(StringValue target) {
target.len = this.len;
target.hashCode = this.hashCode;
target.ensureSize(this.len);
System.arraycopy(this.value, 0, target.value, 0, this.len);
}
@Override
public StringValue copy() {
return new StringValue(this);
}
@Override
public void copy(DataInputView in, DataOutputView target) throws IOException {
int len = in.readUnsignedByte();
target.writeByte(len);
if (len >= HIGH_BIT) {
int shift = 7;
int curr;
len = len & 0x7f;
while ((curr = in.readUnsignedByte()) >= HIGH_BIT) {
len |= (curr & 0x7f) << shift;
shift += 7;
target.writeByte(curr);
}
len |= curr << shift;
target.writeByte(curr);
}
for (int i = 0; i < len; i++) {
int c = in.readUnsignedByte();
target.writeByte(c);
while (c >= HIGH_BIT) {
c = in.readUnsignedByte();
target.writeByte(c);
}
}
}
// --------------------------------------------------------------------------------------------
// Utilities
// --------------------------------------------------------------------------------------------
private void ensureSize(int size) {
if (this.value.length < size) {
this.value = new char[size];
}
}
/**
* Grow and retain content.
*/
private void grow(int size) {
if (this.value.length < size) {
char[] value = new char[ Math.max(this.value.length * 3 / 2, size)];
System.arraycopy(this.value, 0, value, 0, this.len);
this.value = value;
}
}
// --------------------------------------------------------------------------------------------
// Static Helpers for String Serialization
// --------------------------------------------------------------------------------------------
public static String readString(DataInput in) throws IOException {
// the length we read is offset by one, because a length of zero indicates a null value
int len = in.readUnsignedByte();
if (len == 0) {
return null;
}
if (len >= HIGH_BIT) {
int shift = 7;
int curr;
len = len & 0x7f;
while ((curr = in.readUnsignedByte()) >= HIGH_BIT) {
len |= (curr & 0x7f) << shift;
shift += 7;
}
len |= curr << shift;
}
// subtract one for the null length
len -= 1;
final char[] data = new char[len];
for (int i = 0; i < len; i++) {
int c = in.readUnsignedByte();
if (c < HIGH_BIT) {
data[i] = (char) c;
} else {
int shift = 7;
int curr;
c = c & 0x7f;
while ((curr = in.readUnsignedByte()) >= HIGH_BIT) {
c |= (curr & 0x7f) << shift;
shift += 7;
}
c |= curr << shift;
data[i] = (char) c;
}
}
return new String(data, 0, len);
}
public static final void writeString(CharSequence cs, DataOutput out) throws IOException {
if (cs != null) {
// the length we write is offset by one, because a length of zero indicates a null value
int lenToWrite = cs.length()+1;
if (lenToWrite < 0) {
throw new IllegalArgumentException("CharSequence is too long.");
}
// write the length, variable-length encoded
while (lenToWrite >= HIGH_BIT) {
out.write(lenToWrite | HIGH_BIT);
lenToWrite >>>= 7;
}
out.write(lenToWrite);
// write the char data, variable length encoded
for (int i = 0; i < cs.length(); i++) {
int c = cs.charAt(i);
while (c >= HIGH_BIT) {
out.write(c | HIGH_BIT);
c >>>= 7;
}
out.write(c);
}
} else {
out.write(0);
}
}
public static final void copyString(DataInput in, DataOutput out) throws IOException {
int len = in.readUnsignedByte();
out.writeByte(len);
if (len >= HIGH_BIT) {
int shift = 7;
int curr;
len = len & 0x7f;
while ((curr = in.readUnsignedByte()) >= HIGH_BIT) {
out.writeByte(curr);
len |= (curr & 0x7f) << shift;
shift += 7;
}
out.writeByte(curr);
len |= curr << shift;
}
// note that the length is one larger than the actual length (length 0 is a null string, not a zero length string)
len--;
for (int i = 0; i < len; i++) {
int c = in.readUnsignedByte();
out.writeByte(c);
while (c >= HIGH_BIT) {
c = in.readUnsignedByte();
out.writeByte(c);
}
}
}
}