AVRO-2905: Fix Utf8 hash cache (#955)

* AVRO-2905: Fix Utf8 hash cache

* AVRO-2905: Reflect Comment, add length check
diff --git a/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java b/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java
index 4a8f2ae..879a897 100644
--- a/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java
+++ b/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java
@@ -47,18 +47,21 @@
     MAX_LENGTH = i;
   }
 
-  private byte[] bytes = EMPTY;
-  private int hash = 0;
-  private boolean hasHash = false;
+  private byte[] bytes;
+  private int hash;
   private int length;
   private String string;
 
   public Utf8() {
+    bytes = EMPTY;
   }
 
   public Utf8(String string) {
-    this.bytes = getBytesFor(string);
-    this.length = bytes.length;
+    byte[] bytes = getBytesFor(string);
+    int length = bytes.length;
+    checkLength(length);
+    this.bytes = bytes;
+    this.length = length;
     this.string = string;
   }
 
@@ -66,11 +69,14 @@
     this.length = other.length;
     this.bytes = Arrays.copyOf(other.bytes, other.length);
     this.string = other.string;
+    this.hash = other.hash;
   }
 
   public Utf8(byte[] bytes) {
+    int length = bytes.length;
+    checkLength(length);
     this.bytes = bytes;
-    this.length = bytes.length;
+    this.length = length;
   }
 
   /**
@@ -111,24 +117,25 @@
    * length does not change, as this also clears the cached String.
    */
   public Utf8 setByteLength(int newLength) {
-    if (newLength > MAX_LENGTH) {
-      throw new AvroRuntimeException("String length " + newLength + " exceeds maximum allowed");
-    }
+    checkLength(newLength);
     if (this.bytes.length < newLength) {
       this.bytes = Arrays.copyOf(this.bytes, newLength);
     }
     this.length = newLength;
     this.string = null;
-    this.hasHash = false;
+    this.hash = 0;
     return this;
   }
 
   /** Set to the contents of a String. */
   public Utf8 set(String string) {
-    this.bytes = getBytesFor(string);
-    this.length = bytes.length;
+    byte[] bytes = getBytesFor(string);
+    int length = bytes.length;
+    checkLength(length);
+    this.bytes = bytes;
+    this.length = length;
     this.string = string;
-    this.hasHash = false;
+    this.hash = 0;
     return this;
   }
 
@@ -140,7 +147,6 @@
     System.arraycopy(other.bytes, 0, bytes, 0, length);
     this.string = other.string;
     this.hash = other.hash;
-    this.hasHash = other.hasHash;
     return this;
   }
 
@@ -172,13 +178,16 @@
 
   @Override
   public int hashCode() {
-    if (!hasHash) {
+    int h = hash;
+    if (h == 0) {
+      byte[] bytes = this.bytes;
+      int length = this.length;
       for (int i = 0; i < length; i++) {
-        hash = hash * 31 + bytes[i];
+        h = h * 31 + bytes[i];
       }
-      hasHash = true;
+      this.hash = h;
     }
-    return hash;
+    return h;
   }
 
   @Override
@@ -202,9 +211,14 @@
     return toString().subSequence(start, end);
   }
 
+  private static void checkLength(int length) {
+    if (length > MAX_LENGTH) {
+      throw new AvroRuntimeException("String length " + length + " exceeds maximum allowed");
+    }
+  }
+
   /** Gets the UTF-8 bytes for a String */
   public static byte[] getBytesFor(String str) {
     return str.getBytes(StandardCharsets.UTF_8);
   }
-
 }
diff --git a/lang/java/avro/src/test/java/org/apache/avro/util/TestUtf8.java b/lang/java/avro/src/test/java/org/apache/avro/util/TestUtf8.java
index 60c8f71..e62982b 100644
--- a/lang/java/avro/src/test/java/org/apache/avro/util/TestUtf8.java
+++ b/lang/java/avro/src/test/java/org/apache/avro/util/TestUtf8.java
@@ -17,6 +17,7 @@
  */
 package org.apache.avro.util;
 
+import static org.junit.Assert.assertNotEquals;
 import static org.junit.Assert.assertSame;
 import static org.junit.Assert.assertEquals;
 
@@ -51,13 +52,42 @@
 
   @Test
   public void testHashCodeReused() {
+    assertEquals(97, new Utf8("a").hashCode());
+    assertEquals(3904, new Utf8("zz").hashCode());
+    assertEquals(122, new Utf8("z").hashCode());
+    assertEquals(99162322, new Utf8("hello").hashCode());
+    assertEquals(3198781, new Utf8("hell").hashCode());
+
     Utf8 u = new Utf8("a");
     assertEquals(97, u.hashCode());
+    assertEquals(97, u.hashCode());
 
+    u.set("a");
+    assertEquals(97, u.hashCode());
+
+    u.setByteLength(1);
+    assertEquals(97, u.hashCode());
     u.setByteLength(2);
-    u.set("zz");
+    assertNotEquals(97, u.hashCode());
 
-    assertEquals(97121, u.hashCode());
-    assertEquals(97121, u.hashCode());
+    u.set("zz");
+    assertEquals(3904, u.hashCode());
+    u.setByteLength(1);
+    assertEquals(122, u.hashCode());
+
+    u.set("hello");
+    assertEquals(99162322, u.hashCode());
+    u.setByteLength(4);
+    assertEquals(3198781, u.hashCode());
+
+    u.set(new Utf8("zz"));
+    assertEquals(3904, u.hashCode());
+    u.setByteLength(1);
+    assertEquals(122, u.hashCode());
+
+    u.set(new Utf8("hello"));
+    assertEquals(99162322, u.hashCode());
+    u.setByteLength(4);
+    assertEquals(3198781, u.hashCode());
   }
 }