blob: c14bd319bb13321df7c4285908b0f1b60911e984 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.segment;
import org.apache.druid.data.input.impl.DimensionSchema.MultiValueHandling;
import org.apache.druid.java.util.common.ISE;
import org.apache.druid.java.util.common.io.Closer;
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.data.IndexedInts;
import org.apache.druid.segment.data.ZeroIndexedInts;
import org.apache.druid.segment.selector.settable.SettableColumnValueSelector;
import org.apache.druid.segment.selector.settable.SettableDimensionValueSelector;
import org.apache.druid.segment.writeout.SegmentWriteOutMedium;
import java.util.Comparator;
public class StringDimensionHandler implements DimensionHandler<Integer, int[], String>
{
/**
* Compares {@link IndexedInts} lexicographically, with the exception that if a row contains only zeros (that's the
* index of null) at all positions, it is considered "null" as a whole and is "less" than any "non-null" row. Empty
* row (size is zero) is also considered "null".
*
* The implementation is a bit complicated because it tries to check each position of both rows only once.
*/
private static final Comparator<ColumnValueSelector> DIMENSION_SELECTOR_COMPARATOR = (s1, s2) -> {
IndexedInts row1 = getRow(s1);
IndexedInts row2 = getRow(s2);
int len1 = row1.size();
int len2 = row2.size();
boolean row1IsNull = true;
boolean row2IsNull = true;
for (int i = 0; i < Math.min(len1, len2); i++) {
int v1 = row1.get(i);
row1IsNull &= v1 == 0;
int v2 = row2.get(i);
row2IsNull &= v2 == 0;
int valueDiff = Integer.compare(v1, v2);
if (valueDiff != 0) {
return valueDiff;
}
}
//noinspection SubtractionInCompareTo -- substraction is safe here, because lengths or rows are small numbers.
int lenDiff = len1 - len2;
if (lenDiff == 0) {
return 0;
} else {
if (!row1IsNull || !row2IsNull) {
return lenDiff;
} else {
return compareRestNulls(row1, len1, row2, len2);
}
}
};
private static int compareRestNulls(IndexedInts row1, int len1, IndexedInts row2, int len2)
{
if (len1 < len2) {
for (int i = len1; i < len2; i++) {
if (row2.get(i) != 0) {
return -1;
}
}
} else {
for (int i = len2; i < len1; i++) {
if (row1.get(i) != 0) {
return 1;
}
}
}
return 0;
}
/**
* Value for absent column, i. e. {@link NilColumnValueSelector}, should be equivalent to [null] during index merging.
*
* During index merging, if one of the merged indexes has absent columns, {@link StringDimensionMergerV9} ensures
* that null value is present, and it has index = 0 after sorting, because sorting puts null first. See {@link
* StringDimensionMergerV9#hasNull} and the place where it is assigned.
*/
private static IndexedInts getRow(ColumnValueSelector s)
{
if (s instanceof DimensionSelector) {
return ((DimensionSelector) s).getRow();
} else if (s instanceof NilColumnValueSelector) {
return ZeroIndexedInts.instance();
} else {
throw new ISE(
"ColumnValueSelector[%s], only DimensionSelector or NilColumnValueSelector is supported",
s.getClass()
);
}
}
private final String dimensionName;
private final MultiValueHandling multiValueHandling;
private final boolean hasBitmapIndexes;
public StringDimensionHandler(String dimensionName, MultiValueHandling multiValueHandling, boolean hasBitmapIndexes)
{
this.dimensionName = dimensionName;
this.multiValueHandling = multiValueHandling;
this.hasBitmapIndexes = hasBitmapIndexes;
}
@Override
public String getDimensionName()
{
return dimensionName;
}
@Override
public MultiValueHandling getMultivalueHandling()
{
return multiValueHandling;
}
@Override
public int getLengthOfEncodedKeyComponent(int[] dimVals)
{
return dimVals.length;
}
@Override
public Comparator<ColumnValueSelector> getEncodedValueSelectorComparator()
{
return DIMENSION_SELECTOR_COMPARATOR;
}
@Override
public SettableColumnValueSelector makeNewSettableEncodedValueSelector()
{
return new SettableDimensionValueSelector();
}
@Override
public DimensionIndexer<Integer, int[], String> makeIndexer()
{
return new StringDimensionIndexer(multiValueHandling, hasBitmapIndexes);
}
@Override
public DimensionMergerV9 makeMerger(
IndexSpec indexSpec,
SegmentWriteOutMedium segmentWriteOutMedium,
ColumnCapabilities capabilities,
ProgressIndicator progress,
Closer closer
)
{
// Sanity-check capabilities.
if (hasBitmapIndexes != capabilities.hasBitmapIndexes()) {
throw new ISE(
"capabilities.hasBitmapIndexes[%s] != this.hasBitmapIndexes[%s]",
capabilities.hasBitmapIndexes(),
hasBitmapIndexes
);
}
return new StringDimensionMergerV9(dimensionName, indexSpec, segmentWriteOutMedium, capabilities, progress, closer);
}
}