AVRO-2905: Fix Utf8 hash cache (#955)
* AVRO-2905: Fix Utf8 hash cache
* AVRO-2905: Reflect Comment, add length check
diff --git a/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java b/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java
index 4a8f2ae..879a897 100644
--- a/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java
+++ b/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java
@@ -47,18 +47,21 @@
MAX_LENGTH = i;
}
- private byte[] bytes = EMPTY;
- private int hash = 0;
- private boolean hasHash = false;
+ private byte[] bytes;
+ private int hash;
private int length;
private String string;
public Utf8() {
+ bytes = EMPTY;
}
public Utf8(String string) {
- this.bytes = getBytesFor(string);
- this.length = bytes.length;
+ byte[] bytes = getBytesFor(string);
+ int length = bytes.length;
+ checkLength(length);
+ this.bytes = bytes;
+ this.length = length;
this.string = string;
}
@@ -66,11 +69,14 @@
this.length = other.length;
this.bytes = Arrays.copyOf(other.bytes, other.length);
this.string = other.string;
+ this.hash = other.hash;
}
public Utf8(byte[] bytes) {
+ int length = bytes.length;
+ checkLength(length);
this.bytes = bytes;
- this.length = bytes.length;
+ this.length = length;
}
/**
@@ -111,24 +117,25 @@
* length does not change, as this also clears the cached String.
*/
public Utf8 setByteLength(int newLength) {
- if (newLength > MAX_LENGTH) {
- throw new AvroRuntimeException("String length " + newLength + " exceeds maximum allowed");
- }
+ checkLength(newLength);
if (this.bytes.length < newLength) {
this.bytes = Arrays.copyOf(this.bytes, newLength);
}
this.length = newLength;
this.string = null;
- this.hasHash = false;
+ this.hash = 0;
return this;
}
/** Set to the contents of a String. */
public Utf8 set(String string) {
- this.bytes = getBytesFor(string);
- this.length = bytes.length;
+ byte[] bytes = getBytesFor(string);
+ int length = bytes.length;
+ checkLength(length);
+ this.bytes = bytes;
+ this.length = length;
this.string = string;
- this.hasHash = false;
+ this.hash = 0;
return this;
}
@@ -140,7 +147,6 @@
System.arraycopy(other.bytes, 0, bytes, 0, length);
this.string = other.string;
this.hash = other.hash;
- this.hasHash = other.hasHash;
return this;
}
@@ -172,13 +178,16 @@
@Override
public int hashCode() {
- if (!hasHash) {
+ int h = hash;
+ if (h == 0) {
+ byte[] bytes = this.bytes;
+ int length = this.length;
for (int i = 0; i < length; i++) {
- hash = hash * 31 + bytes[i];
+ h = h * 31 + bytes[i];
}
- hasHash = true;
+ this.hash = h;
}
- return hash;
+ return h;
}
@Override
@@ -202,9 +211,14 @@
return toString().subSequence(start, end);
}
+ private static void checkLength(int length) {
+ if (length > MAX_LENGTH) {
+ throw new AvroRuntimeException("String length " + length + " exceeds maximum allowed");
+ }
+ }
+
/** Gets the UTF-8 bytes for a String */
public static byte[] getBytesFor(String str) {
return str.getBytes(StandardCharsets.UTF_8);
}
-
}
diff --git a/lang/java/avro/src/test/java/org/apache/avro/util/TestUtf8.java b/lang/java/avro/src/test/java/org/apache/avro/util/TestUtf8.java
index 60c8f71..e62982b 100644
--- a/lang/java/avro/src/test/java/org/apache/avro/util/TestUtf8.java
+++ b/lang/java/avro/src/test/java/org/apache/avro/util/TestUtf8.java
@@ -17,6 +17,7 @@
*/
package org.apache.avro.util;
+import static org.junit.Assert.assertNotEquals;
import static org.junit.Assert.assertSame;
import static org.junit.Assert.assertEquals;
@@ -51,13 +52,42 @@
@Test
public void testHashCodeReused() {
+ assertEquals(97, new Utf8("a").hashCode());
+ assertEquals(3904, new Utf8("zz").hashCode());
+ assertEquals(122, new Utf8("z").hashCode());
+ assertEquals(99162322, new Utf8("hello").hashCode());
+ assertEquals(3198781, new Utf8("hell").hashCode());
+
Utf8 u = new Utf8("a");
assertEquals(97, u.hashCode());
+ assertEquals(97, u.hashCode());
+ u.set("a");
+ assertEquals(97, u.hashCode());
+
+ u.setByteLength(1);
+ assertEquals(97, u.hashCode());
u.setByteLength(2);
- u.set("zz");
+ assertNotEquals(97, u.hashCode());
- assertEquals(97121, u.hashCode());
- assertEquals(97121, u.hashCode());
+ u.set("zz");
+ assertEquals(3904, u.hashCode());
+ u.setByteLength(1);
+ assertEquals(122, u.hashCode());
+
+ u.set("hello");
+ assertEquals(99162322, u.hashCode());
+ u.setByteLength(4);
+ assertEquals(3198781, u.hashCode());
+
+ u.set(new Utf8("zz"));
+ assertEquals(3904, u.hashCode());
+ u.setByteLength(1);
+ assertEquals(122, u.hashCode());
+
+ u.set(new Utf8("hello"));
+ assertEquals(99162322, u.hashCode());
+ u.setByteLength(4);
+ assertEquals(3198781, u.hashCode());
}
}