blob: 5b56476fb5eccf533fc734d63eef7aa86b130720 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.segment.column;
import com.google.common.base.Predicate;
import com.google.common.base.Predicates;
import org.apache.druid.java.util.common.guava.CloseQuietly;
import org.apache.druid.query.extraction.ExtractionFn;
import org.apache.druid.query.filter.ValueMatcher;
import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
import org.apache.druid.segment.AbstractDimensionSelector;
import org.apache.druid.segment.DimensionSelectorUtils;
import org.apache.druid.segment.IdLookup;
import org.apache.druid.segment.data.CachingIndexed;
import org.apache.druid.segment.data.ColumnarInts;
import org.apache.druid.segment.data.ColumnarMultiInts;
import org.apache.druid.segment.data.Indexed;
import org.apache.druid.segment.data.IndexedInts;
import org.apache.druid.segment.data.ReadableOffset;
import org.apache.druid.segment.data.SingleIndexedInt;
import org.apache.druid.segment.filter.BooleanValueMatcher;
import org.apache.druid.segment.historical.HistoricalDimensionSelector;
import org.apache.druid.segment.historical.SingleValueHistoricalDimensionSelector;
import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector;
import org.apache.druid.segment.vector.ReadableVectorInspector;
import org.apache.druid.segment.vector.ReadableVectorOffset;
import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector;
import org.apache.druid.segment.vector.VectorObjectSelector;
import javax.annotation.Nullable;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.BitSet;
/**
*
*/
public class StringDictionaryEncodedColumn implements DictionaryEncodedColumn<String>
{
@Nullable
private final ColumnarInts column;
@Nullable
private final ColumnarMultiInts multiValueColumn;
private final CachingIndexed<String> cachedDictionary;
private final Indexed<ByteBuffer> dictionaryUtf8;
public StringDictionaryEncodedColumn(
@Nullable ColumnarInts singleValueColumn,
@Nullable ColumnarMultiInts multiValueColumn,
CachingIndexed<String> dictionary,
Indexed<ByteBuffer> dictionaryUtf8
)
{
this.column = singleValueColumn;
this.multiValueColumn = multiValueColumn;
this.cachedDictionary = dictionary;
this.dictionaryUtf8 = dictionaryUtf8;
}
@Override
public int length()
{
return hasMultipleValues() ? multiValueColumn.size() : column.size();
}
@Override
public boolean hasMultipleValues()
{
return column == null;
}
@Override
public int getSingleValueRow(int rowNum)
{
return column.get(rowNum);
}
@Override
public IndexedInts getMultiValueRow(int rowNum)
{
return multiValueColumn.get(rowNum);
}
@Override
@Nullable
public String lookupName(int id)
{
return cachedDictionary.get(id);
}
/**
* Returns the value for a particular dictionary id as UTF-8 bytes.
*
* The returned buffer is in big-endian order. It is not reused, so callers may modify the position, limit, byte
* order, etc of the buffer.
*
* The returned buffer points to the original data, so callers must take care not to use it outside the valid
* lifetime of this column.
*
* @param id id to lookup the dictionary value for
*
* @return dictionary value for the given id, or null if the value is itself null
*/
@Nullable
public ByteBuffer lookupNameUtf8(int id)
{
return dictionaryUtf8.get(id);
}
@Override
public int lookupId(String name)
{
return cachedDictionary.indexOf(name);
}
@Override
public int getCardinality()
{
return cachedDictionary.size();
}
@Override
public HistoricalDimensionSelector makeDimensionSelector(
final ReadableOffset offset,
@Nullable final ExtractionFn extractionFn
)
{
abstract class QueryableDimensionSelector extends AbstractDimensionSelector
implements HistoricalDimensionSelector, IdLookup
{
@Override
public int getValueCardinality()
{
/*
This is technically wrong if
extractionFn != null && (extractionFn.getExtractionType() != ExtractionFn.ExtractionType.ONE_TO_ONE ||
!extractionFn.preservesOrdering())
However current behavior allows some GroupBy-V1 queries to work that wouldn't work otherwise and doesn't
cause any problems due to special handling of extractionFn everywhere.
See https://github.com/apache/druid/pull/8433
*/
return getCardinality();
}
@Override
public String lookupName(int id)
{
final String value = StringDictionaryEncodedColumn.this.lookupName(id);
return extractionFn == null ? value : extractionFn.apply(value);
}
@Nullable
@Override
public ByteBuffer lookupNameUtf8(int id)
{
return StringDictionaryEncodedColumn.this.lookupNameUtf8(id);
}
@Override
public boolean supportsLookupNameUtf8()
{
return true;
}
@Override
public boolean nameLookupPossibleInAdvance()
{
return true;
}
@Nullable
@Override
public IdLookup idLookup()
{
return extractionFn == null ? this : null;
}
@Override
public int lookupId(String name)
{
if (extractionFn != null) {
throw new UnsupportedOperationException("cannot perform lookup when applying an extraction function");
}
return StringDictionaryEncodedColumn.this.lookupId(name);
}
}
if (hasMultipleValues()) {
class MultiValueDimensionSelector extends QueryableDimensionSelector
{
@Override
public IndexedInts getRow()
{
return multiValueColumn.get(offset.getOffset());
}
@Override
public IndexedInts getRow(int offset)
{
return multiValueColumn.get(offset);
}
@Override
public ValueMatcher makeValueMatcher(@Nullable String value)
{
return DimensionSelectorUtils.makeValueMatcherGeneric(this, value);
}
@Override
public ValueMatcher makeValueMatcher(Predicate<String> predicate)
{
return DimensionSelectorUtils.makeValueMatcherGeneric(this, predicate);
}
@Nullable
@Override
public Object getObject()
{
return defaultGetObject();
}
@Override
public Class classOfObject()
{
return Object.class;
}
@Override
public void inspectRuntimeShape(RuntimeShapeInspector inspector)
{
inspector.visit("multiValueColumn", multiValueColumn);
inspector.visit("offset", offset);
inspector.visit("extractionFn", extractionFn);
}
}
return new MultiValueDimensionSelector();
} else {
class SingleValueQueryableDimensionSelector extends QueryableDimensionSelector
implements SingleValueHistoricalDimensionSelector
{
private final SingleIndexedInt row = new SingleIndexedInt();
@Override
public IndexedInts getRow()
{
row.setValue(getRowValue());
return row;
}
public int getRowValue()
{
return column.get(offset.getOffset());
}
@Override
public IndexedInts getRow(int offset)
{
row.setValue(getRowValue(offset));
return row;
}
@Override
public int getRowValue(int offset)
{
return column.get(offset);
}
@Override
public ValueMatcher makeValueMatcher(final @Nullable String value)
{
if (extractionFn == null) {
final int valueId = lookupId(value);
if (valueId >= 0) {
return new ValueMatcher()
{
@Override
public boolean matches()
{
return getRowValue() == valueId;
}
@Override
public void inspectRuntimeShape(RuntimeShapeInspector inspector)
{
inspector.visit("column", StringDictionaryEncodedColumn.this);
}
};
} else {
return BooleanValueMatcher.of(false);
}
} else {
// Employ caching BitSet optimization
return makeValueMatcher(Predicates.equalTo(value));
}
}
@Override
public ValueMatcher makeValueMatcher(final Predicate<String> predicate)
{
final BitSet checkedIds = new BitSet(getCardinality());
final BitSet matchingIds = new BitSet(getCardinality());
// Lazy matcher; only check an id if matches() is called.
return new ValueMatcher()
{
@Override
public boolean matches()
{
final int id = getRowValue();
if (checkedIds.get(id)) {
return matchingIds.get(id);
} else {
final boolean matches = predicate.apply(lookupName(id));
checkedIds.set(id);
if (matches) {
matchingIds.set(id);
}
return matches;
}
}
@Override
public void inspectRuntimeShape(RuntimeShapeInspector inspector)
{
inspector.visit("column", StringDictionaryEncodedColumn.this);
}
};
}
@Override
public Object getObject()
{
return lookupName(getRowValue());
}
@Override
public Class classOfObject()
{
return String.class;
}
@Override
public void inspectRuntimeShape(RuntimeShapeInspector inspector)
{
inspector.visit("column", column);
inspector.visit("offset", offset);
inspector.visit("extractionFn", extractionFn);
}
}
return new SingleValueQueryableDimensionSelector();
}
}
@Override
public SingleValueDimensionVectorSelector makeSingleValueDimensionVectorSelector(final ReadableVectorOffset offset)
{
class QueryableSingleValueDimensionVectorSelector implements SingleValueDimensionVectorSelector, IdLookup
{
private final int[] vector = new int[offset.getMaxVectorSize()];
private int id = ReadableVectorInspector.NULL_ID;
@Override
public int[] getRowVector()
{
if (id == offset.getId()) {
return vector;
}
if (offset.isContiguous()) {
column.get(vector, offset.getStartOffset(), offset.getCurrentVectorSize());
} else {
column.get(vector, offset.getOffsets(), offset.getCurrentVectorSize());
}
id = offset.getId();
return vector;
}
@Override
public int getValueCardinality()
{
return getCardinality();
}
@Nullable
@Override
public String lookupName(final int id)
{
return StringDictionaryEncodedColumn.this.lookupName(id);
}
@Nullable
@Override
public ByteBuffer lookupNameUtf8(int id)
{
return StringDictionaryEncodedColumn.this.lookupNameUtf8(id);
}
@Override
public boolean supportsLookupNameUtf8()
{
return true;
}
@Override
public boolean nameLookupPossibleInAdvance()
{
return true;
}
@Nullable
@Override
public IdLookup idLookup()
{
return this;
}
@Override
public int lookupId(@Nullable final String name)
{
return StringDictionaryEncodedColumn.this.lookupId(name);
}
@Override
public int getCurrentVectorSize()
{
return offset.getCurrentVectorSize();
}
@Override
public int getMaxVectorSize()
{
return offset.getMaxVectorSize();
}
}
return new QueryableSingleValueDimensionVectorSelector();
}
@Override
public MultiValueDimensionVectorSelector makeMultiValueDimensionVectorSelector(final ReadableVectorOffset offset)
{
class QueryableMultiValueDimensionVectorSelector implements MultiValueDimensionVectorSelector, IdLookup
{
private final IndexedInts[] vector = new IndexedInts[offset.getMaxVectorSize()];
private int id = ReadableVectorInspector.NULL_ID;
@Override
public IndexedInts[] getRowVector()
{
if (id == offset.getId()) {
return vector;
}
if (offset.isContiguous()) {
final int currentOffset = offset.getStartOffset();
final int numRows = offset.getCurrentVectorSize();
for (int i = 0; i < numRows; i++) {
// Must use getUnshared, otherwise all elements in the vector could be the same shared object.
vector[i] = multiValueColumn.getUnshared(i + currentOffset);
}
} else {
final int[] offsets = offset.getOffsets();
final int numRows = offset.getCurrentVectorSize();
for (int i = 0; i < numRows; i++) {
// Must use getUnshared, otherwise all elements in the vector could be the same shared object.
vector[i] = multiValueColumn.getUnshared(offsets[i]);
}
}
id = offset.getId();
return vector;
}
@Override
public int getValueCardinality()
{
return getCardinality();
}
@Nullable
@Override
public String lookupName(final int id)
{
return StringDictionaryEncodedColumn.this.lookupName(id);
}
@Nullable
@Override
public ByteBuffer lookupNameUtf8(int id)
{
return StringDictionaryEncodedColumn.this.lookupNameUtf8(id);
}
@Override
public boolean supportsLookupNameUtf8()
{
return true;
}
@Override
public boolean nameLookupPossibleInAdvance()
{
return true;
}
@Nullable
@Override
public IdLookup idLookup()
{
return this;
}
@Override
public int lookupId(@Nullable final String name)
{
return StringDictionaryEncodedColumn.this.lookupId(name);
}
@Override
public int getCurrentVectorSize()
{
return offset.getCurrentVectorSize();
}
@Override
public int getMaxVectorSize()
{
return offset.getMaxVectorSize();
}
}
return new QueryableMultiValueDimensionVectorSelector();
}
@Override
public VectorObjectSelector makeVectorObjectSelector(ReadableVectorOffset offset)
{
if (!hasMultipleValues()) {
class DictionaryEncodedStringSingleValueVectorObjectSelector implements VectorObjectSelector
{
private final int[] vector = new int[offset.getMaxVectorSize()];
private final String[] strings = new String[offset.getMaxVectorSize()];
private int id = ReadableVectorInspector.NULL_ID;
@Override
public Object[] getObjectVector()
{
if (id == offset.getId()) {
return strings;
}
if (offset.isContiguous()) {
column.get(vector, offset.getStartOffset(), offset.getCurrentVectorSize());
} else {
column.get(vector, offset.getOffsets(), offset.getCurrentVectorSize());
}
for (int i = 0; i < offset.getCurrentVectorSize(); i++) {
strings[i] = lookupName(vector[i]);
}
id = offset.getId();
return strings;
}
@Override
public int getMaxVectorSize()
{
return offset.getMaxVectorSize();
}
@Override
public int getCurrentVectorSize()
{
return offset.getCurrentVectorSize();
}
}
return new DictionaryEncodedStringSingleValueVectorObjectSelector();
} else {
throw new UnsupportedOperationException("Multivalue string object selector not implemented yet");
}
}
@Override
public void close() throws IOException
{
CloseQuietly.close(cachedDictionary);
if (column != null) {
column.close();
}
if (multiValueColumn != null) {
multiValueColumn.close();
}
}
}