blob: 28bc32656bfed30c91c8e6534efb3f0b29d8d64b [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.pinot.core.operator.filter.predicate;
import it.unimi.dsi.fastutil.doubles.DoubleOpenHashSet;
import it.unimi.dsi.fastutil.doubles.DoubleSet;
import it.unimi.dsi.fastutil.floats.FloatOpenHashSet;
import it.unimi.dsi.fastutil.floats.FloatSet;
import it.unimi.dsi.fastutil.ints.IntOpenHashSet;
import it.unimi.dsi.fastutil.ints.IntSet;
import it.unimi.dsi.fastutil.longs.LongOpenHashSet;
import it.unimi.dsi.fastutil.longs.LongSet;
import it.unimi.dsi.fastutil.objects.ObjectOpenHashSet;
import java.math.BigDecimal;
import java.util.Arrays;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;
import org.apache.pinot.common.request.context.predicate.InPredicate;
import org.apache.pinot.common.utils.HashUtil;
import org.apache.pinot.segment.spi.index.reader.Dictionary;
import org.apache.pinot.spi.data.FieldSpec.DataType;
import org.apache.pinot.spi.utils.ByteArray;
/**
* Factory for IN predicate evaluators.
*/
public class InPredicateEvaluatorFactory {
private InPredicateEvaluatorFactory() {
}
/**
* Create a new instance of dictionary based IN predicate evaluator.
*
* @param inPredicate IN predicate to evaluate
* @param dictionary Dictionary for the column
* @param dataType Data type for the column
* @return Dictionary based IN predicate evaluator
*/
public static BaseDictionaryBasedPredicateEvaluator newDictionaryBasedEvaluator(InPredicate inPredicate,
Dictionary dictionary, DataType dataType) {
return new DictionaryBasedInPredicateEvaluator(inPredicate, dictionary, dataType);
}
/**
* Create a new instance of raw value based IN predicate evaluator.
*
* @param inPredicate IN predicate to evaluate
* @param dataType Data type for the column
* @return Raw value based IN predicate evaluator
*/
public static BaseRawValueBasedPredicateEvaluator newRawValueBasedEvaluator(InPredicate inPredicate,
DataType dataType) {
switch (dataType) {
case INT: {
int[] intValues = inPredicate.getIntValues();
IntSet matchingValues = new IntOpenHashSet(HashUtil.getMinHashSetSize(intValues.length));
for (int value : intValues) {
matchingValues.add(value);
}
return new IntRawValueBasedInPredicateEvaluator(inPredicate, matchingValues);
}
case LONG: {
long[] longValues = inPredicate.getLongValues();
LongSet matchingValues = new LongOpenHashSet(HashUtil.getMinHashSetSize(longValues.length));
for (long value : longValues) {
matchingValues.add(value);
}
return new LongRawValueBasedInPredicateEvaluator(inPredicate, matchingValues);
}
case FLOAT: {
float[] floatValues = inPredicate.getFloatValues();
FloatSet matchingValues = new FloatOpenHashSet(HashUtil.getMinHashSetSize(floatValues.length));
for (float value : floatValues) {
matchingValues.add(value);
}
return new FloatRawValueBasedInPredicateEvaluator(inPredicate, matchingValues);
}
case DOUBLE: {
double[] doubleValues = inPredicate.getDoubleValues();
DoubleSet matchingValues = new DoubleOpenHashSet(HashUtil.getMinHashSetSize(doubleValues.length));
for (double value : doubleValues) {
matchingValues.add(value);
}
return new DoubleRawValueBasedInPredicateEvaluator(inPredicate, matchingValues);
}
case BIG_DECIMAL: {
BigDecimal[] bigDecimalValues = inPredicate.getBigDecimalValues();
// NOTE: Use TreeSet because BigDecimal's compareTo() is not consistent with equals()
// E.g. compareTo(3.0, 3) returns 0 but equals(3.0, 3) returns false
TreeSet<BigDecimal> matchingValues = new TreeSet<>(Arrays.asList(bigDecimalValues));
return new BigDecimalRawValueBasedInPredicateEvaluator(inPredicate, matchingValues);
}
case BOOLEAN: {
int[] booleanValues = inPredicate.getBooleanValues();
IntSet matchingValues = new IntOpenHashSet(HashUtil.getMinHashSetSize(booleanValues.length));
for (int value : booleanValues) {
matchingValues.add(value);
}
return new IntRawValueBasedInPredicateEvaluator(inPredicate, matchingValues);
}
case TIMESTAMP: {
long[] timestampValues = inPredicate.getTimestampValues();
LongSet matchingValues = new LongOpenHashSet(HashUtil.getMinHashSetSize(timestampValues.length));
for (long value : timestampValues) {
matchingValues.add(value);
}
return new LongRawValueBasedInPredicateEvaluator(inPredicate, matchingValues);
}
case STRING: {
List<String> stringValues = inPredicate.getValues();
Set<String> matchingValues = new ObjectOpenHashSet<>(HashUtil.getMinHashSetSize(stringValues.size()));
// NOTE: Add value-by-value to avoid overhead
for (String value : stringValues) {
//noinspection UseBulkOperation
matchingValues.add(value);
}
return new StringRawValueBasedInPredicateEvaluator(inPredicate, matchingValues);
}
case BYTES: {
ByteArray[] bytesValues = inPredicate.getBytesValues();
Set<ByteArray> matchingValues = new ObjectOpenHashSet<>(HashUtil.getMinHashSetSize(bytesValues.length));
// NOTE: Add value-by-value to avoid overhead
//noinspection ManualArrayToCollectionCopy
for (ByteArray value : bytesValues) {
//noinspection UseBulkOperation
matchingValues.add(value);
}
return new BytesRawValueBasedInPredicateEvaluator(inPredicate, matchingValues);
}
default:
throw new IllegalStateException("Unsupported data type: " + dataType);
}
}
private static final class DictionaryBasedInPredicateEvaluator extends BaseDictionaryBasedPredicateEvaluator {
final IntSet _matchingDictIdSet;
final int _numMatchingDictIds;
int[] _matchingDictIds;
DictionaryBasedInPredicateEvaluator(InPredicate inPredicate, Dictionary dictionary, DataType dataType) {
super(inPredicate);
_matchingDictIdSet = PredicateUtils.getDictIdSet(inPredicate, dictionary, dataType);
_numMatchingDictIds = _matchingDictIdSet.size();
if (_numMatchingDictIds == 0) {
_alwaysFalse = true;
} else if (dictionary.length() == _numMatchingDictIds) {
_alwaysTrue = true;
}
}
@Override
public boolean applySV(int dictId) {
return _matchingDictIdSet.contains(dictId);
}
@Override
public int getNumMatchingDictIds() {
return _numMatchingDictIds;
}
@Override
public int[] getMatchingDictIds() {
if (_matchingDictIds == null) {
_matchingDictIds = _matchingDictIdSet.toIntArray();
}
return _matchingDictIds;
}
@Override
public int applySV(int limit, int[] docIds, int[] values) {
// reimplemented here to ensure applySV can be inlined
int matches = 0;
for (int i = 0; i < limit; i++) {
int value = values[i];
if (applySV(value)) {
docIds[matches++] = docIds[i];
}
}
return matches;
}
}
private static final class IntRawValueBasedInPredicateEvaluator extends BaseRawValueBasedPredicateEvaluator {
final IntSet _matchingValues;
IntRawValueBasedInPredicateEvaluator(InPredicate inPredicate, IntSet matchingValues) {
super(inPredicate);
_matchingValues = matchingValues;
}
@Override
public DataType getDataType() {
return DataType.INT;
}
@Override
public boolean applySV(int value) {
return _matchingValues.contains(value);
}
@Override
public int applySV(int limit, int[] docIds, int[] values) {
// reimplemented here to ensure applySV can be inlined
int matches = 0;
for (int i = 0; i < limit; i++) {
int value = values[i];
if (applySV(value)) {
docIds[matches++] = docIds[i];
}
}
return matches;
}
}
private static final class LongRawValueBasedInPredicateEvaluator extends BaseRawValueBasedPredicateEvaluator {
final LongSet _matchingValues;
LongRawValueBasedInPredicateEvaluator(InPredicate inPredicate, LongSet matchingValues) {
super(inPredicate);
_matchingValues = matchingValues;
}
@Override
public DataType getDataType() {
return DataType.LONG;
}
@Override
public boolean applySV(long value) {
return _matchingValues.contains(value);
}
@Override
public int applySV(int limit, int[] docIds, long[] values) {
// reimplemented here to ensure applySV can be inlined
int matches = 0;
for (int i = 0; i < limit; i++) {
long value = values[i];
if (applySV(value)) {
docIds[matches++] = docIds[i];
}
}
return matches;
}
}
private static final class FloatRawValueBasedInPredicateEvaluator extends BaseRawValueBasedPredicateEvaluator {
final FloatSet _matchingValues;
FloatRawValueBasedInPredicateEvaluator(InPredicate inPredicate, FloatSet matchingValues) {
super(inPredicate);
_matchingValues = matchingValues;
}
@Override
public DataType getDataType() {
return DataType.FLOAT;
}
@Override
public boolean applySV(float value) {
return _matchingValues.contains(value);
}
@Override
public int applySV(int limit, int[] docIds, float[] values) {
// reimplemented here to ensure applySV can be inlined
int matches = 0;
for (int i = 0; i < limit; i++) {
float value = values[i];
if (applySV(value)) {
docIds[matches++] = docIds[i];
}
}
return matches;
}
}
private static final class DoubleRawValueBasedInPredicateEvaluator extends BaseRawValueBasedPredicateEvaluator {
final DoubleSet _matchingValues;
DoubleRawValueBasedInPredicateEvaluator(InPredicate inPredicate, DoubleSet matchingValues) {
super(inPredicate);
_matchingValues = matchingValues;
}
@Override
public DataType getDataType() {
return DataType.DOUBLE;
}
@Override
public boolean applySV(double value) {
return _matchingValues.contains(value);
}
@Override
public int applySV(int limit, int[] docIds, double[] values) {
// reimplemented here to ensure applySV can be inlined
int matches = 0;
for (int i = 0; i < limit; i++) {
double value = values[i];
if (applySV(value)) {
docIds[matches++] = docIds[i];
}
}
return matches;
}
}
private static final class BigDecimalRawValueBasedInPredicateEvaluator extends BaseRawValueBasedPredicateEvaluator {
// Note: BigDecimal's compareTo is not consistent with equals (e.g. compareTo(3.0, 3) returns zero when
// equals(3.0, 3) returns false).
// - HashSet implementation consider both hashCode() and equals() for the key.
// - TreeSet implementation on the other hand decides equality based on compareTo() method, and leaves it to
// the end user to ensure that maintained ordering is consistent with equals if it is to correctly implement
// the Set interface.
final TreeSet<BigDecimal> _matchingValues;
BigDecimalRawValueBasedInPredicateEvaluator(InPredicate inPredicate, TreeSet<BigDecimal> matchingValues) {
super(inPredicate);
_matchingValues = matchingValues;
}
@Override
public DataType getDataType() {
return DataType.BIG_DECIMAL;
}
@Override
public boolean applySV(BigDecimal value) {
return _matchingValues.contains(value);
}
}
private static final class StringRawValueBasedInPredicateEvaluator extends BaseRawValueBasedPredicateEvaluator {
final Set<String> _matchingValues;
StringRawValueBasedInPredicateEvaluator(InPredicate inPredicate, Set<String> matchingValues) {
super(inPredicate);
_matchingValues = matchingValues;
}
@Override
public DataType getDataType() {
return DataType.STRING;
}
@Override
public boolean applySV(String value) {
return _matchingValues.contains(value);
}
}
private static final class BytesRawValueBasedInPredicateEvaluator extends BaseRawValueBasedPredicateEvaluator {
final Set<ByteArray> _matchingValues;
BytesRawValueBasedInPredicateEvaluator(InPredicate inPredicate, Set<ByteArray> matchingValues) {
super(inPredicate);
_matchingValues = matchingValues;
}
@Override
public DataType getDataType() {
return DataType.BYTES;
}
@Override
public boolean applySV(byte[] value) {
return _matchingValues.contains(new ByteArray(value));
}
}
}