blob: 31ca53d1585bd48070bd3ba626e70ea722698a8a [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.segment;
import org.apache.druid.data.input.impl.DimensionSchema;
import org.apache.druid.data.input.impl.DimensionSchema.MultiValueHandling;
import org.apache.druid.data.input.impl.StringDimensionSchema;
import org.apache.druid.java.util.common.ISE;
import org.apache.druid.java.util.common.io.Closer;
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.data.IndexedInts;
import org.apache.druid.segment.data.ZeroIndexedInts;
import org.apache.druid.segment.selector.settable.SettableColumnValueSelector;
import org.apache.druid.segment.selector.settable.SettableDimensionValueSelector;
import org.apache.druid.segment.writeout.SegmentWriteOutMedium;
import java.util.Comparator;
public class StringDimensionHandler implements DimensionHandler<Integer, int[], String>
{
/**
* This comparator uses the following rules:
* - Compare the two value arrays up to the length of the shorter array
* - If the two arrays match so far, then compare the array lengths, the shorter array is considered smaller
* - Comparing null and the empty list is a special case: these are considered equal
*/
private static final Comparator<ColumnValueSelector> DIMENSION_SELECTOR_COMPARATOR = (s1, s2) -> {
IndexedInts row1 = getRow(s1);
IndexedInts row2 = getRow(s2);
int len1 = row1.size();
int len2 = row2.size();
int lenCompareResult = Integer.compare(len1, len2);
int valsIndex = 0;
if (lenCompareResult != 0) {
// if the values don't have the same length, check if we're comparing [] and [null], which are equivalent
if (len1 + len2 == 1) {
IndexedInts longerRow = len2 > len1 ? row2 : row1;
if (longerRow.get(0) == 0) {
return 0;
} else {
//noinspection ObjectEquality -- longerRow is explicitly set to only row1 or row2
return longerRow == row1 ? 1 : -1;
}
}
}
int lenToCompare = Math.min(len1, len2);
while (valsIndex < lenToCompare) {
int v1 = row1.get(valsIndex);
int v2 = row2.get(valsIndex);
int valueCompareResult = Integer.compare(v1, v2);
if (valueCompareResult != 0) {
return valueCompareResult;
}
++valsIndex;
}
return lenCompareResult;
};
/**
* Value for absent column, i. e. {@link NilColumnValueSelector}, should be equivalent to [null] during index merging.
* <p>
* During index merging, if one of the merged indexes has absent columns, {@link StringDimensionMergerV9} ensures
* that null value is present, and it has index = 0 after sorting, because sorting puts null first. See {@link
* StringDimensionMergerV9#hasNull} and the place where it is assigned.
*/
private static IndexedInts getRow(ColumnValueSelector s)
{
if (s instanceof DimensionSelector) {
return ((DimensionSelector) s).getRow();
} else if (s instanceof NilColumnValueSelector) {
return ZeroIndexedInts.instance();
} else {
throw new ISE(
"ColumnValueSelector[%s], only DimensionSelector or NilColumnValueSelector is supported",
s.getClass()
);
}
}
private final String dimensionName;
private final MultiValueHandling multiValueHandling;
private final boolean hasBitmapIndexes;
private final boolean hasSpatialIndexes;
public StringDimensionHandler(
String dimensionName,
MultiValueHandling multiValueHandling,
boolean hasBitmapIndexes,
boolean hasSpatialIndexes
)
{
this.dimensionName = dimensionName;
this.multiValueHandling = multiValueHandling;
this.hasBitmapIndexes = hasBitmapIndexes;
this.hasSpatialIndexes = hasSpatialIndexes;
}
@Override
public String getDimensionName()
{
return dimensionName;
}
@Override
public DimensionSchema getDimensionSchema(ColumnCapabilities capabilities)
{
return new StringDimensionSchema(dimensionName);
}
@Override
public MultiValueHandling getMultivalueHandling()
{
return multiValueHandling;
}
@Override
public int getLengthOfEncodedKeyComponent(int[] dimVals)
{
return dimVals.length;
}
@Override
public Comparator<ColumnValueSelector> getEncodedValueSelectorComparator()
{
return DIMENSION_SELECTOR_COMPARATOR;
}
@Override
public SettableColumnValueSelector makeNewSettableEncodedValueSelector()
{
return new SettableDimensionValueSelector();
}
@Override
public DimensionIndexer<Integer, int[], String> makeIndexer(boolean useMaxMemoryEstimates)
{
return new StringDimensionIndexer(multiValueHandling, hasBitmapIndexes, hasSpatialIndexes, useMaxMemoryEstimates);
}
@Override
public DimensionMergerV9 makeMerger(
IndexSpec indexSpec,
SegmentWriteOutMedium segmentWriteOutMedium,
ColumnCapabilities capabilities,
ProgressIndicator progress,
Closer closer
)
{
// Sanity-check capabilities.
if (hasBitmapIndexes != capabilities.hasBitmapIndexes()) {
throw new ISE(
"capabilities.hasBitmapIndexes[%s] != this.hasBitmapIndexes[%s]",
capabilities.hasBitmapIndexes(),
hasBitmapIndexes
);
}
return new StringDimensionMergerV9(
dimensionName,
indexSpec,
segmentWriteOutMedium,
capabilities,
progress,
closer
);
}
}