| /** |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| package org.apache.pinot.core.operator.filter.predicate; |
| |
| import it.unimi.dsi.fastutil.doubles.DoubleOpenHashSet; |
| import it.unimi.dsi.fastutil.doubles.DoubleSet; |
| import it.unimi.dsi.fastutil.floats.FloatOpenHashSet; |
| import it.unimi.dsi.fastutil.floats.FloatSet; |
| import it.unimi.dsi.fastutil.ints.IntOpenHashSet; |
| import it.unimi.dsi.fastutil.ints.IntSet; |
| import it.unimi.dsi.fastutil.longs.LongOpenHashSet; |
| import it.unimi.dsi.fastutil.longs.LongSet; |
| import it.unimi.dsi.fastutil.objects.ObjectOpenHashSet; |
| import java.math.BigDecimal; |
| import java.util.Arrays; |
| import java.util.List; |
| import java.util.Set; |
| import java.util.TreeSet; |
| import org.apache.pinot.common.request.context.predicate.NotInPredicate; |
| import org.apache.pinot.common.utils.HashUtil; |
| import org.apache.pinot.segment.spi.index.reader.Dictionary; |
| import org.apache.pinot.spi.data.FieldSpec.DataType; |
| import org.apache.pinot.spi.utils.ByteArray; |
| |
| |
| /** |
| * Factory for NOT_IN predicate evaluators. |
| */ |
| public class NotInPredicateEvaluatorFactory { |
| private NotInPredicateEvaluatorFactory() { |
| } |
| |
| /** |
| * Create a new instance of dictionary based NOT_IN predicate evaluator. |
| * |
| * @param notInPredicate NOT_IN predicate to evaluate |
| * @param dictionary Dictionary for the column |
| * @param dataType Data type for the column |
| * @return Dictionary based NOT_IN predicate evaluator |
| */ |
| public static BaseDictionaryBasedPredicateEvaluator newDictionaryBasedEvaluator(NotInPredicate notInPredicate, |
| Dictionary dictionary, DataType dataType) { |
| return new DictionaryBasedNotInPredicateEvaluator(notInPredicate, dictionary, dataType); |
| } |
| |
| /** |
| * Create a new instance of raw value based NOT_IN predicate evaluator. |
| * |
| * @param notInPredicate NOT_IN predicate to evaluate |
| * @param dataType Data type for the column |
| * @return Raw value based NOT_IN predicate evaluator |
| */ |
| public static BaseRawValueBasedPredicateEvaluator newRawValueBasedEvaluator(NotInPredicate notInPredicate, |
| DataType dataType) { |
| switch (dataType) { |
| case INT: { |
| int[] intValues = notInPredicate.getIntValues(); |
| IntSet nonMatchingValues = new IntOpenHashSet(HashUtil.getMinHashSetSize(intValues.length)); |
| for (int value : intValues) { |
| nonMatchingValues.add(value); |
| } |
| return new IntRawValueBasedNotInPredicateEvaluator(notInPredicate, nonMatchingValues); |
| } |
| case LONG: { |
| long[] longValues = notInPredicate.getLongValues(); |
| LongSet nonMatchingValues = new LongOpenHashSet(HashUtil.getMinHashSetSize(longValues.length)); |
| for (long value : longValues) { |
| nonMatchingValues.add(value); |
| } |
| return new LongRawValueBasedNotInPredicateEvaluator(notInPredicate, nonMatchingValues); |
| } |
| case FLOAT: { |
| float[] floatValues = notInPredicate.getFloatValues(); |
| FloatSet nonMatchingValues = new FloatOpenHashSet(HashUtil.getMinHashSetSize(floatValues.length)); |
| for (float value : floatValues) { |
| nonMatchingValues.add(value); |
| } |
| return new FloatRawValueBasedNotInPredicateEvaluator(notInPredicate, nonMatchingValues); |
| } |
| case DOUBLE: { |
| double[] doubleValues = notInPredicate.getDoubleValues(); |
| DoubleSet nonMatchingValues = new DoubleOpenHashSet(HashUtil.getMinHashSetSize(doubleValues.length)); |
| for (double value : doubleValues) { |
| nonMatchingValues.add(value); |
| } |
| return new DoubleRawValueBasedNotInPredicateEvaluator(notInPredicate, nonMatchingValues); |
| } |
| case BIG_DECIMAL: { |
| BigDecimal[] bigDecimalValues = notInPredicate.getBigDecimalValues(); |
| // NOTE: Use TreeSet because BigDecimal's compareTo() is not consistent with equals() |
| // E.g. compareTo(3.0, 3) returns 0 but equals(3.0, 3) returns false |
| TreeSet<BigDecimal> nonMatchingValues = new TreeSet<>(Arrays.asList(bigDecimalValues)); |
| return new BigDecimalRawValueBasedNotInPredicateEvaluator(notInPredicate, nonMatchingValues); |
| } |
| case BOOLEAN: { |
| int[] booleanValues = notInPredicate.getBooleanValues(); |
| IntSet nonMatchingValues = new IntOpenHashSet(HashUtil.getMinHashSetSize(booleanValues.length)); |
| for (int value : booleanValues) { |
| nonMatchingValues.add(value); |
| } |
| return new IntRawValueBasedNotInPredicateEvaluator(notInPredicate, nonMatchingValues); |
| } |
| case TIMESTAMP: { |
| long[] timestampValues = notInPredicate.getTimestampValues(); |
| LongSet nonMatchingValues = new LongOpenHashSet(HashUtil.getMinHashSetSize(timestampValues.length)); |
| for (long value : timestampValues) { |
| nonMatchingValues.add(value); |
| } |
| return new LongRawValueBasedNotInPredicateEvaluator(notInPredicate, nonMatchingValues); |
| } |
| case STRING: { |
| List<String> stringValues = notInPredicate.getValues(); |
| Set<String> nonMatchingValues = new ObjectOpenHashSet<>(HashUtil.getMinHashSetSize(stringValues.size())); |
| // NOTE: Add value-by-value to avoid overhead |
| for (String value : stringValues) { |
| //noinspection UseBulkOperation |
| nonMatchingValues.add(value); |
| } |
| return new StringRawValueBasedNotInPredicateEvaluator(notInPredicate, nonMatchingValues); |
| } |
| case BYTES: { |
| ByteArray[] bytesValues = notInPredicate.getBytesValues(); |
| Set<ByteArray> nonMatchingValues = new ObjectOpenHashSet<>(HashUtil.getMinHashSetSize(bytesValues.length)); |
| // NOTE: Add value-by-value to avoid overhead |
| //noinspection ManualArrayToCollectionCopy |
| for (ByteArray value : bytesValues) { |
| //noinspection UseBulkOperation |
| nonMatchingValues.add(value); |
| } |
| return new BytesRawValueBasedNotInPredicateEvaluator(notInPredicate, nonMatchingValues); |
| } |
| default: |
| throw new IllegalStateException("Unsupported data type: " + dataType); |
| } |
| } |
| |
| public static final class DictionaryBasedNotInPredicateEvaluator extends BaseDictionaryBasedPredicateEvaluator { |
| final IntSet _nonMatchingDictIdSet; |
| final int _numNonMatchingDictIds; |
| final Dictionary _dictionary; |
| int[] _matchingDictIds; |
| int[] _nonMatchingDictIds; |
| |
| DictionaryBasedNotInPredicateEvaluator(NotInPredicate notInPredicate, Dictionary dictionary, DataType dataType) { |
| super(notInPredicate); |
| _nonMatchingDictIdSet = PredicateUtils.getDictIdSet(notInPredicate, dictionary, dataType); |
| _numNonMatchingDictIds = _nonMatchingDictIdSet.size(); |
| if (_numNonMatchingDictIds == 0) { |
| _alwaysTrue = true; |
| } else if (dictionary.length() == _numNonMatchingDictIds) { |
| _alwaysFalse = true; |
| } |
| _dictionary = dictionary; |
| } |
| |
| @Override |
| public boolean applySV(int dictId) { |
| return !_nonMatchingDictIdSet.contains(dictId); |
| } |
| |
| @Override |
| public int applySV(int limit, int[] docIds, int[] values) { |
| // reimplemented here to ensure applySV can be inlined |
| int matches = 0; |
| for (int i = 0; i < limit; i++) { |
| int value = values[i]; |
| if (applySV(value)) { |
| docIds[matches++] = docIds[i]; |
| } |
| } |
| return matches; |
| } |
| |
| @Override |
| public int[] getMatchingDictIds() { |
| if (_matchingDictIds == null) { |
| int dictionarySize = _dictionary.length(); |
| _matchingDictIds = new int[dictionarySize - _numNonMatchingDictIds]; |
| int index = 0; |
| for (int dictId = 0; dictId < dictionarySize; dictId++) { |
| if (!_nonMatchingDictIdSet.contains(dictId)) { |
| _matchingDictIds[index++] = dictId; |
| } |
| } |
| } |
| return _matchingDictIds; |
| } |
| |
| @Override |
| public int getNumNonMatchingDictIds() { |
| return _numNonMatchingDictIds; |
| } |
| |
| @Override |
| public int[] getNonMatchingDictIds() { |
| if (_nonMatchingDictIds == null) { |
| _nonMatchingDictIds = _nonMatchingDictIdSet.toIntArray(); |
| } |
| return _nonMatchingDictIds; |
| } |
| } |
| |
| private static final class IntRawValueBasedNotInPredicateEvaluator extends BaseRawValueBasedPredicateEvaluator { |
| final IntSet _nonMatchingValues; |
| |
| IntRawValueBasedNotInPredicateEvaluator(NotInPredicate notInPredicate, IntSet nonMatchingValues) { |
| super(notInPredicate); |
| _nonMatchingValues = nonMatchingValues; |
| } |
| |
| @Override |
| public DataType getDataType() { |
| return DataType.INT; |
| } |
| |
| @Override |
| public boolean applySV(int value) { |
| return !_nonMatchingValues.contains(value); |
| } |
| |
| @Override |
| public int applySV(int limit, int[] docIds, int[] values) { |
| // reimplemented here to ensure applySV can be inlined |
| int matches = 0; |
| for (int i = 0; i < limit; i++) { |
| int value = values[i]; |
| if (applySV(value)) { |
| docIds[matches++] = docIds[i]; |
| } |
| } |
| return matches; |
| } |
| } |
| |
| private static final class LongRawValueBasedNotInPredicateEvaluator extends BaseRawValueBasedPredicateEvaluator { |
| final LongSet _nonMatchingValues; |
| |
| LongRawValueBasedNotInPredicateEvaluator(NotInPredicate notInPredicate, LongSet nonMatchingValues) { |
| super(notInPredicate); |
| _nonMatchingValues = nonMatchingValues; |
| } |
| |
| @Override |
| public DataType getDataType() { |
| return DataType.LONG; |
| } |
| |
| @Override |
| public boolean applySV(long value) { |
| return !_nonMatchingValues.contains(value); |
| } |
| |
| @Override |
| public int applySV(int limit, int[] docIds, long[] values) { |
| // reimplemented here to ensure applySV can be inlined |
| int matches = 0; |
| for (int i = 0; i < limit; i++) { |
| long value = values[i]; |
| if (applySV(value)) { |
| docIds[matches++] = docIds[i]; |
| } |
| } |
| return matches; |
| } |
| } |
| |
| private static final class FloatRawValueBasedNotInPredicateEvaluator extends BaseRawValueBasedPredicateEvaluator { |
| final FloatSet _nonMatchingValues; |
| |
| FloatRawValueBasedNotInPredicateEvaluator(NotInPredicate notInPredicate, FloatSet nonMatchingValues) { |
| super(notInPredicate); |
| _nonMatchingValues = nonMatchingValues; |
| } |
| |
| @Override |
| public DataType getDataType() { |
| return DataType.FLOAT; |
| } |
| |
| @Override |
| public boolean applySV(float value) { |
| return !_nonMatchingValues.contains(value); |
| } |
| |
| @Override |
| public int applySV(int limit, int[] docIds, float[] values) { |
| // reimplemented here to ensure applySV can be inlined |
| int matches = 0; |
| for (int i = 0; i < limit; i++) { |
| float value = values[i]; |
| if (applySV(value)) { |
| docIds[matches++] = docIds[i]; |
| } |
| } |
| return matches; |
| } |
| } |
| |
| private static final class DoubleRawValueBasedNotInPredicateEvaluator extends BaseRawValueBasedPredicateEvaluator { |
| final DoubleSet _nonMatchingValues; |
| |
| DoubleRawValueBasedNotInPredicateEvaluator(NotInPredicate notInPredicate, DoubleSet nonMatchingValues) { |
| super(notInPredicate); |
| _nonMatchingValues = nonMatchingValues; |
| } |
| |
| @Override |
| public DataType getDataType() { |
| return DataType.DOUBLE; |
| } |
| |
| @Override |
| public boolean applySV(double value) { |
| return !_nonMatchingValues.contains(value); |
| } |
| |
| @Override |
| public int applySV(int limit, int[] docIds, double[] values) { |
| // reimplemented here to ensure applySV can be inlined |
| int matches = 0; |
| for (int i = 0; i < limit; i++) { |
| double value = values[i]; |
| if (applySV(value)) { |
| docIds[matches++] = docIds[i]; |
| } |
| } |
| return matches; |
| } |
| } |
| |
| private static final class BigDecimalRawValueBasedNotInPredicateEvaluator |
| extends BaseRawValueBasedPredicateEvaluator { |
| // See: BigDecimalRawValueBasedInPredicateEvaluator. |
| final TreeSet<BigDecimal> _nonMatchingValues; |
| |
| BigDecimalRawValueBasedNotInPredicateEvaluator(NotInPredicate notInPredicate, |
| TreeSet<BigDecimal> nonMatchingValues) { |
| super(notInPredicate); |
| _nonMatchingValues = nonMatchingValues; |
| } |
| |
| @Override |
| public DataType getDataType() { |
| return DataType.BIG_DECIMAL; |
| } |
| |
| @Override |
| public boolean applySV(BigDecimal value) { |
| return !_nonMatchingValues.contains(value); |
| } |
| } |
| |
| private static final class StringRawValueBasedNotInPredicateEvaluator extends BaseRawValueBasedPredicateEvaluator { |
| final Set<String> _nonMatchingValues; |
| |
| StringRawValueBasedNotInPredicateEvaluator(NotInPredicate notInPredicate, Set<String> nonMatchingValues) { |
| super(notInPredicate); |
| _nonMatchingValues = nonMatchingValues; |
| } |
| |
| @Override |
| public DataType getDataType() { |
| return DataType.STRING; |
| } |
| |
| @Override |
| public boolean applySV(String value) { |
| return !_nonMatchingValues.contains(value); |
| } |
| } |
| |
| private static final class BytesRawValueBasedNotInPredicateEvaluator extends BaseRawValueBasedPredicateEvaluator { |
| final Set<ByteArray> _nonMatchingValues; |
| |
| BytesRawValueBasedNotInPredicateEvaluator(NotInPredicate notInPredicate, Set<ByteArray> nonMatchingValues) { |
| super(notInPredicate); |
| _nonMatchingValues = nonMatchingValues; |
| } |
| |
| @Override |
| public DataType getDataType() { |
| return DataType.BYTES; |
| } |
| |
| @Override |
| public boolean applySV(byte[] value) { |
| return !_nonMatchingValues.contains(new ByteArray(value)); |
| } |
| } |
| } |