blob: 879a897b9cfa7dd43f8b678c20a872fe4e9b2cf8 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.avro.util;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import org.apache.avro.AvroRuntimeException;
import org.apache.avro.io.BinaryData;
import org.slf4j.LoggerFactory;
/**
* A Utf8 string. Unlike {@link String}, instances are mutable. This is more
* efficient than {@link String} when reading or writing a sequence of values,
* as a single instance may be reused.
*/
public class Utf8 implements Comparable<Utf8>, CharSequence {
private static final String MAX_LENGTH_PROPERTY = "org.apache.avro.limits.string.maxLength";
private static final int MAX_LENGTH;
private static final byte[] EMPTY = new byte[0];
static {
String o = System.getProperty(MAX_LENGTH_PROPERTY);
int i = Integer.MAX_VALUE;
if (o != null) {
try {
i = Integer.parseUnsignedInt(o);
} catch (NumberFormatException nfe) {
LoggerFactory.getLogger(Utf8.class).warn("Could not parse property " + MAX_LENGTH_PROPERTY + ": " + o, nfe);
}
}
MAX_LENGTH = i;
}
private byte[] bytes;
private int hash;
private int length;
private String string;
public Utf8() {
bytes = EMPTY;
}
public Utf8(String string) {
byte[] bytes = getBytesFor(string);
int length = bytes.length;
checkLength(length);
this.bytes = bytes;
this.length = length;
this.string = string;
}
public Utf8(Utf8 other) {
this.length = other.length;
this.bytes = Arrays.copyOf(other.bytes, other.length);
this.string = other.string;
this.hash = other.hash;
}
public Utf8(byte[] bytes) {
int length = bytes.length;
checkLength(length);
this.bytes = bytes;
this.length = length;
}
/**
* Return UTF-8 encoded bytes. Only valid through {@link #getByteLength()}.
*/
public byte[] getBytes() {
return bytes;
}
/**
* Return length in bytes.
*
* @deprecated call {@link #getByteLength()} instead.
*/
@Deprecated
public int getLength() {
return length;
}
/** Return length in bytes. */
public int getByteLength() {
return length;
}
/**
* Set length in bytes. Should called whenever byte content changes, even if the
* length does not change, as this also clears the cached String.
*
* @deprecated call {@link #setByteLength(int)} instead.
*/
@Deprecated
public Utf8 setLength(int newLength) {
return setByteLength(newLength);
}
/**
* Set length in bytes. Should called whenever byte content changes, even if the
* length does not change, as this also clears the cached String.
*/
public Utf8 setByteLength(int newLength) {
checkLength(newLength);
if (this.bytes.length < newLength) {
this.bytes = Arrays.copyOf(this.bytes, newLength);
}
this.length = newLength;
this.string = null;
this.hash = 0;
return this;
}
/** Set to the contents of a String. */
public Utf8 set(String string) {
byte[] bytes = getBytesFor(string);
int length = bytes.length;
checkLength(length);
this.bytes = bytes;
this.length = length;
this.string = string;
this.hash = 0;
return this;
}
public Utf8 set(Utf8 other) {
if (this.bytes.length < other.length) {
this.bytes = new byte[other.length];
}
this.length = other.length;
System.arraycopy(other.bytes, 0, bytes, 0, length);
this.string = other.string;
this.hash = other.hash;
return this;
}
@Override
public String toString() {
if (this.length == 0)
return "";
if (this.string == null) {
this.string = new String(bytes, 0, length, StandardCharsets.UTF_8);
}
return this.string;
}
@Override
public boolean equals(Object o) {
if (o == this)
return true;
if (!(o instanceof Utf8))
return false;
Utf8 that = (Utf8) o;
if (!(this.length == that.length))
return false;
byte[] thatBytes = that.bytes;
for (int i = 0; i < this.length; i++)
if (bytes[i] != thatBytes[i])
return false;
return true;
}
@Override
public int hashCode() {
int h = hash;
if (h == 0) {
byte[] bytes = this.bytes;
int length = this.length;
for (int i = 0; i < length; i++) {
h = h * 31 + bytes[i];
}
this.hash = h;
}
return h;
}
@Override
public int compareTo(Utf8 that) {
return BinaryData.compareBytes(this.bytes, 0, this.length, that.bytes, 0, that.length);
}
// CharSequence implementation
@Override
public char charAt(int index) {
return toString().charAt(index);
}
@Override
public int length() {
return toString().length();
}
@Override
public CharSequence subSequence(int start, int end) {
return toString().subSequence(start, end);
}
private static void checkLength(int length) {
if (length > MAX_LENGTH) {
throw new AvroRuntimeException("String length " + length + " exceeds maximum allowed");
}
}
/** Gets the UTF-8 bytes for a String */
public static byte[] getBytesFor(String str) {
return str.getBytes(StandardCharsets.UTF_8);
}
}