InDimFilter: Fix cache key computation to avoid collisions. (#11168)
The prior code did not include separation between values, and encoded
null ambiguously. This patch fixes both of those issues by encoding
strings as length + value instead of just value.
I think cache key computation was OK prior to #9800. Prior to that
patch, the cache key was computed using CacheKeyBuilder.appendStrings,
which encodes strings as UTF-8 and inserts a separator byte (0xff)
between them that cannot appear in a UTF-8 stream.
diff --git a/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java
index 577e255..adba7f8 100644
--- a/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java
+++ b/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java
@@ -432,14 +432,18 @@
sortedValues = sortedValuesList;
}
+ // Hash all values, in sorted order, as their length followed by their content.
final Hasher hasher = Hashing.sha256().newHasher();
for (String v : sortedValues) {
if (v == null) {
- hasher.putInt(0);
+ // Encode null as length -1, no content.
+ hasher.putInt(-1);
} else {
+ hasher.putInt(v.length());
hasher.putString(v, StandardCharsets.UTF_8);
}
}
+
return new CacheKeyBuilder(DimFilterUtils.IN_CACHE_ID)
.appendString(dimension)
.appendByte(DimFilterUtils.STRING_SEPARATOR)
diff --git a/processing/src/test/java/org/apache/druid/query/filter/InDimFilterTest.java b/processing/src/test/java/org/apache/druid/query/filter/InDimFilterTest.java
index 3bc53f6..25c27d4 100644
--- a/processing/src/test/java/org/apache/druid/query/filter/InDimFilterTest.java
+++ b/processing/src/test/java/org/apache/druid/query/filter/InDimFilterTest.java
@@ -98,6 +98,19 @@
}
@Test
+ public void testGetCacheKeyForNullVsEmptyString()
+ {
+ final InDimFilter inDimFilter1 = new InDimFilter("dimTest", Arrays.asList(null, "abc"), null);
+ final InDimFilter inDimFilter2 = new InDimFilter("dimTest", Arrays.asList("", "abc"), null);
+
+ if (NullHandling.sqlCompatible()) {
+ Assert.assertFalse(Arrays.equals(inDimFilter1.getCacheKey(), inDimFilter2.getCacheKey()));
+ } else {
+ Assert.assertArrayEquals(inDimFilter1.getCacheKey(), inDimFilter2.getCacheKey());
+ }
+ }
+
+ @Test
public void testGetCacheKeyReturningSameKeyForSetsOfDifferentTypesAndComparators()
{
final Set<String> reverseOrderSet = new TreeSet<>(Ordering.natural().reversed());
@@ -120,6 +133,22 @@
}
@Test
+ public void testGetCacheKeyDifferentKeysForNullAndFourZeroChars()
+ {
+ final InDimFilter inDimFilter1 = new InDimFilter("dimTest", Arrays.asList(null, "abc"), null);
+ final InDimFilter inDimFilter2 = new InDimFilter("dimTest", Arrays.asList("\0\0\0\0", "abc"), null);
+ Assert.assertFalse(Arrays.equals(inDimFilter1.getCacheKey(), inDimFilter2.getCacheKey()));
+ }
+
+ @Test
+ public void testGetCacheKeyDifferentKeysWhenStringBoundariesMove()
+ {
+ final InDimFilter inDimFilter1 = new InDimFilter("dimTest", Arrays.asList("bar", "foo"), null);
+ final InDimFilter inDimFilter2 = new InDimFilter("dimTest", Arrays.asList("barf", "oo"), null);
+ Assert.assertFalse(Arrays.equals(inDimFilter1.getCacheKey(), inDimFilter2.getCacheKey()));
+ }
+
+ @Test
public void testGetCacheKeyDifferentKeysForListOfStringsAndSingleStringOfListsWithExtractFn()
{
RegexDimExtractionFn regexFn = new RegexDimExtractionFn(".*", false, null);