Move String intern code and make it generic (#1925)
Create an Interner class that implements a deduplicating intern
function for any object type. Add corresponding unit test, and
convert TabletLocator's custom WeakHashMap over to use this
instead.
diff --git a/core/src/main/java/org/apache/accumulo/core/clientImpl/TabletLocator.java b/core/src/main/java/org/apache/accumulo/core/clientImpl/TabletLocator.java
index 5bfb64d..9494dd5 100644
--- a/core/src/main/java/org/apache/accumulo/core/clientImpl/TabletLocator.java
+++ b/core/src/main/java/org/apache/accumulo/core/clientImpl/TabletLocator.java
@@ -20,13 +20,11 @@
import static com.google.common.base.Preconditions.checkArgument;
-import java.lang.ref.WeakReference;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
-import java.util.WeakHashMap;
import org.apache.accumulo.core.client.AccumuloException;
import org.apache.accumulo.core.client.AccumuloSecurityException;
@@ -40,6 +38,7 @@
import org.apache.accumulo.core.metadata.RootTable;
import org.apache.accumulo.core.singletons.SingletonManager;
import org.apache.accumulo.core.singletons.SingletonService;
+import org.apache.accumulo.core.util.Interner;
import org.apache.hadoop.io.Text;
import com.google.common.base.Preconditions;
@@ -195,22 +194,7 @@
}
public static class TabletLocation implements Comparable<TabletLocation> {
- private static final WeakHashMap<String,WeakReference<String>> tabletLocs = new WeakHashMap<>();
-
- private static String dedupeLocation(String tabletLoc) {
- synchronized (tabletLocs) {
- WeakReference<String> lref = tabletLocs.get(tabletLoc);
- if (lref != null) {
- String loc = lref.get();
- if (loc != null) {
- return loc;
- }
- }
-
- tabletLocs.put(tabletLoc, new WeakReference<>(tabletLoc));
- return tabletLoc;
- }
- }
+ private static final Interner<String> interner = new Interner<>();
public final KeyExtent tablet_extent;
public final String tablet_location;
@@ -221,8 +205,8 @@
checkArgument(tablet_location != null, "tablet_location is null");
checkArgument(session != null, "session is null");
this.tablet_extent = tablet_extent;
- this.tablet_location = dedupeLocation(tablet_location);
- this.tablet_session = dedupeLocation(session);
+ this.tablet_location = interner.intern(tablet_location);
+ this.tablet_session = interner.intern(session);
}
@Override
diff --git a/core/src/main/java/org/apache/accumulo/core/util/Interner.java b/core/src/main/java/org/apache/accumulo/core/util/Interner.java
new file mode 100644
index 0000000..502adbe
--- /dev/null
+++ b/core/src/main/java/org/apache/accumulo/core/util/Interner.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.accumulo.core.util;
+
+import java.lang.ref.WeakReference;
+import java.util.WeakHashMap;
+
+/**
+ * A utility that mimics String.intern() for any immutable object type (including String).
+ */
+public class Interner<T> {
+ private final WeakHashMap<T,WeakReference<T>> internTable = new WeakHashMap<>();
+
+ public synchronized T intern(T item) {
+ WeakReference<T> ref = internTable.get(item);
+ if (ref != null) {
+ T oldItem = ref.get();
+ if (oldItem != null) {
+ return oldItem;
+ }
+ }
+ internTable.put(item, new WeakReference<>(item));
+ return item;
+ }
+
+ // for testing
+ synchronized int size() {
+ return internTable.size();
+ }
+}
diff --git a/core/src/test/java/org/apache/accumulo/core/util/InternerTest.java b/core/src/test/java/org/apache/accumulo/core/util/InternerTest.java
new file mode 100644
index 0000000..6dd808f
--- /dev/null
+++ b/core/src/test/java/org/apache/accumulo/core/util/InternerTest.java
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.accumulo.core.util;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotEquals;
+import static org.junit.Assert.assertNotSame;
+import static org.junit.Assert.assertSame;
+
+import org.junit.Test;
+
+public class InternerTest {
+
+ private class TestObj {
+ private final int id;
+
+ TestObj(int id) {
+ this.id = id;
+ }
+
+ @Override
+ public int hashCode() {
+ return id;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (obj instanceof TestObj) {
+ return ((TestObj) obj).id == this.id;
+ }
+ return false;
+ }
+ }
+
+ @Test
+ public void testInternDedupes() {
+ var interner = new Interner<TestObj>();
+
+ var obj1 = new TestObj(1);
+ var obj1_dupe = new TestObj(1);
+ assertSame(obj1, obj1);
+ assertNotSame(obj1, obj1_dupe);
+ assertEquals(obj1, obj1_dupe);
+ assertEquals(obj1.hashCode(), obj1_dupe.hashCode());
+
+ // verify object gets added to the intern pool
+ assertSame(obj1, interner.intern(obj1));
+ assertEquals(1, interner.size());
+
+ // verify equivalent, but not the same object, gets deduplicated
+ assertSame(obj1, interner.intern(obj1_dupe));
+ assertEquals(1, interner.size());
+
+ // verify second object grows the intern pool size
+ var obj2 = new TestObj(2);
+ assertNotSame(obj1, obj2);
+ assertNotEquals(obj1, obj2);
+ var intern2 = interner.intern(obj2);
+ assertEquals(2, interner.size());
+
+ // sanity check to ensure we got the same object back for obj2, and it's not mangled with obj1
+ assertSame(obj2, intern2);
+ assertNotEquals(obj1, intern2);
+ }
+
+ @Test(timeout = 10_000)
+ public void testInternsGetGarbageCollected() {
+ var interner = new Interner<TestObj>();
+ assertEquals(0, interner.size()); // ensure empty
+
+ // add one and keep a strong reference
+ var obj1 = interner.intern(new TestObj(1));
+ assertEquals(1, interner.size());
+
+ // try to add a second, weakly referenced object until it sticks (may be GC'd between checks)
+ do {
+ interner.intern(new TestObj(2));
+ } while (interner.size() != 2);
+
+ // best effort to GC until the weakly reachable object goes away or until test times out
+ do {
+ System.gc();
+ } while (interner.size() != 1);
+
+ // ensure obj1 is still interned (because we kept a strong reference)
+ assertSame(obj1, interner.intern(new TestObj(1)));
+
+ // ensure second test object is entirely new (previous ones should have been GC'd)
+ var obj2 = new TestObj(2);
+ assertSame(obj2, interner.intern(obj2));
+ assertEquals(2, interner.size());
+ }
+
+}