blob: f2f1cf01d417c9f23ab18b7c9b8f04e444580d43 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.segment;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableMultiset;
import com.google.common.collect.Sets;
import org.apache.druid.collections.bitmap.ImmutableBitmap;
import org.apache.druid.common.config.NullHandling;
import org.apache.druid.data.input.MapBasedInputRow;
import org.apache.druid.java.util.common.ISE;
import org.apache.druid.java.util.common.guava.Comparators;
import org.apache.druid.query.aggregation.AggregatorFactory;
import org.apache.druid.segment.column.BitmapIndex;
import org.apache.druid.segment.column.ColumnHolder;
import org.apache.druid.segment.column.DictionaryEncodedColumn;
import org.apache.druid.segment.data.IncrementalIndexTest;
import org.apache.druid.segment.incremental.IncrementalIndex;
import org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
import org.roaringbitmap.IntIterator;
import java.io.File;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
public class IndexMergerNullHandlingTest
{
private IndexMerger indexMerger;
private IndexIO indexIO;
private IndexSpec indexSpec;
@Rule
public TemporaryFolder temporaryFolder = new TemporaryFolder();
@Before
public void setUp()
{
indexMerger = TestHelper.getTestIndexMergerV9(OffHeapMemorySegmentWriteOutMediumFactory.instance());
indexIO = TestHelper.getTestIndexIO();
indexSpec = new IndexSpec();
}
@Test
public void testStringColumnNullHandling() throws Exception
{
List<Map<String, Object>> nonNullFlavors = new ArrayList<>();
nonNullFlavors.add(ImmutableMap.of("d", "a"));
nonNullFlavors.add(ImmutableMap.of("d", ImmutableList.of("a", "b")));
List<Map<String, Object>> nullFlavors = new ArrayList<>();
Map<String, Object> mMissing = ImmutableMap.of();
Map<String, Object> mEmptyList = ImmutableMap.of("d", Collections.emptyList());
Map<String, Object> mNull = new HashMap<>();
mNull.put("d", null);
Map<String, Object> mEmptyString = ImmutableMap.of("d", "");
Map<String, Object> mListOfNull = ImmutableMap.of("d", Collections.singletonList(null));
Map<String, Object> mListOfEmptyString = ImmutableMap.of("d", Collections.singletonList(""));
nullFlavors.add(mMissing);
nullFlavors.add(mEmptyList);
nullFlavors.add(mNull);
nullFlavors.add(mListOfNull);
if (NullHandling.replaceWithDefault()) {
nullFlavors.add(mEmptyString);
nullFlavors.add(mListOfEmptyString);
} else {
nonNullFlavors.add(mEmptyString);
nonNullFlavors.add(mListOfEmptyString);
}
Set<Map<String, Object>> allValues = new HashSet<>();
allValues.addAll(nonNullFlavors);
allValues.addAll(nullFlavors);
for (Set<Map<String, Object>> subset : Sets.powerSet(allValues)) {
if (subset.isEmpty()) {
continue;
}
final List<Map<String, Object>> subsetList = new ArrayList<>(subset);
IncrementalIndex toPersist = IncrementalIndexTest.createIndex(new AggregatorFactory[]{});
for (Map<String, Object> m : subsetList) {
toPersist.add(new MapBasedInputRow(0L, ImmutableList.of("d"), m));
}
final File tempDir = temporaryFolder.newFolder();
try (QueryableIndex index = indexIO.loadIndex(indexMerger.persist(toPersist, tempDir, indexSpec, null))) {
final ColumnHolder columnHolder = index.getColumnHolder("d");
if (nullFlavors.containsAll(subsetList)) {
// all null -> should be missing
Assert.assertNull(subsetList.toString(), columnHolder);
} else {
Assert.assertNotNull(subsetList.toString(), columnHolder);
// The column has multiple values if there are any lists with > 1 element in the input set.
final boolean hasMultipleValues = subsetList.stream()
.anyMatch(m -> m.get("d") instanceof List
&& (((List) m.get("d")).size() > 1));
// Compute all unique values, the same way that IndexMerger is expected to do it.
final Set<String> uniqueValues = new HashSet<>();
for (Map<String, Object> m : subsetList) {
final List<String> dValues = normalize(m.get("d"), hasMultipleValues);
uniqueValues.addAll(dValues);
if (nullFlavors.contains(m)) {
uniqueValues.add(null);
}
}
try (final DictionaryEncodedColumn<String> dictionaryColumn =
(DictionaryEncodedColumn<String>) columnHolder.getColumn()) {
// Verify unique values against the dictionary.
Assert.assertEquals(
subsetList.toString(),
uniqueValues.stream().sorted(Comparators.naturalNullsFirst()).collect(Collectors.toList()),
IntStream.range(0, dictionaryColumn.getCardinality())
.mapToObj(dictionaryColumn::lookupName)
.collect(Collectors.toList())
);
Assert.assertEquals(subsetList.toString(), hasMultipleValues, dictionaryColumn.hasMultipleValues());
Assert.assertEquals(subsetList.toString(), uniqueValues.size(), dictionaryColumn.getCardinality());
// Verify the expected set of rows was indexed, ignoring order.
Assert.assertEquals(
subsetList.toString(),
ImmutableMultiset.copyOf(
subsetList.stream()
.map(m -> normalize(m.get("d"), hasMultipleValues))
.distinct() // Distinct values only, because we expect rollup.
.collect(Collectors.toList())
),
ImmutableMultiset.copyOf(
IntStream.range(0, index.getNumRows())
.mapToObj(rowNumber -> getRow(dictionaryColumn, rowNumber))
// The "distinct" shouldn't be necessary, but it is, because [{}, {d=}, {d=a}]
// yields [[null] x 2, [a]] (arguably a bug).
.distinct()
.collect(Collectors.toList())
)
);
// Verify that the bitmap index for null is correct.
final BitmapIndex bitmapIndex = columnHolder.getBitmapIndex();
// Read through the column to find all the rows that should match null.
final List<Integer> expectedNullRows = new ArrayList<>();
for (int i = 0; i < index.getNumRows(); i++) {
final List<String> row = getRow(dictionaryColumn, i);
if (row.isEmpty() || row.stream().anyMatch(NullHandling::isNullOrEquivalent)) {
expectedNullRows.add(i);
}
}
Assert.assertEquals(subsetList.toString(), expectedNullRows.size() > 0, bitmapIndex.hasNulls());
if (expectedNullRows.size() > 0) {
Assert.assertEquals(subsetList.toString(), 0, bitmapIndex.getIndex(null));
final ImmutableBitmap nullBitmap = bitmapIndex.getBitmap(bitmapIndex.getIndex(null));
final List<Integer> actualNullRows = new ArrayList<>();
final IntIterator iterator = nullBitmap.iterator();
while (iterator.hasNext()) {
actualNullRows.add(iterator.next());
}
Assert.assertEquals(subsetList.toString(), expectedNullRows, actualNullRows);
} else {
Assert.assertEquals(-1, bitmapIndex.getIndex(null));
}
}
}
}
}
}
/**
* Normalize an input value the same way that IndexMerger is expected to do it.
*/
private static List<String> normalize(final Object value, final boolean hasMultipleValues)
{
final List<String> retVal = new ArrayList<>();
if (value == null) {
retVal.add(null);
} else if (value instanceof String) {
retVal.add(NullHandling.emptyToNullIfNeeded(((String) value)));
} else if (value instanceof List) {
final List<String> list = (List<String>) value;
if (list.isEmpty() && !hasMultipleValues) {
// empty lists become nulls in single valued columns
retVal.add(NullHandling.emptyToNullIfNeeded(null));
} else {
retVal.addAll(list.stream().map(NullHandling::emptyToNullIfNeeded).collect(Collectors.toList()));
}
} else {
throw new ISE("didn't expect class[%s]", value.getClass());
}
return retVal;
}
/**
* Get a particular row from a column, exactly as reported by the column.
*/
private static List<String> getRow(final DictionaryEncodedColumn<String> column, final int rowNumber)
{
final List<String> retVal = new ArrayList<>();
if (column.hasMultipleValues()) {
column.getMultiValueRow(rowNumber).forEach(i -> retVal.add(column.lookupName(i)));
} else {
retVal.add(column.lookupName(column.getSingleValueRow(rowNumber)));
}
return retVal;
}
}